In [51]:
import pandas as pd
import numpy as np
def load_data():
    import pandas as pd
    import os
    df = pd.read_csv("../model/total_data.csv")

    y = df["Phase"]
    x = df.drop(columns=["Phase"])
    return x, y

def make_train_test_split(x, y):

    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=123456,
    )

    X_train = x_train.apply(pd.to_numeric, errors='coerce')
    X_test = x_test.apply(pd.to_numeric, errors='coerce')
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    X_train_scaled = pd.DataFrame(X_train_scaled).fillna(0)
    X_test_scaled = pd.DataFrame(X_test_scaled).fillna(0)


    return X_train_scaled, X_test_scaled, y_train, y_test

def eval_metrics(y_true, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2

def report(estimator, mse, mae, r2):

    print(estimator, ":", sep="")
    print(f"  MSE: {mse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")

def set_tracking_uri():

    import mlflow

    mlflow.set_tracking_uri('sqlite:///mlruns.db')


def run():
    #
    # Entrena un modelo sklearn ElasticNet
    #
    import sys
    from sklearn.ensemble import RandomForestClassifier
    import mlflow


    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    n_estimators = 100
    max_depth = 10
    verbose = 1

    set_tracking_uri()
    print('Tracking directory:', mlflow.get_tracking_uri())

    with mlflow.start_run():
        estimator = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        estimator.fit(x_train, y_train)
        mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
        if verbose > 0:
            report(estimator, mse, mae, r2)

        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)

        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        mlflow.sklearn.log_model(estimator, "model")

if __name__ == "__main__":
    run()

  df = pd.read_csv("../model/total_data.csv")
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


Tracking directory: sqlite:///mlruns.db


2023/03/26 22:22:45 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/03/26 22:22:46 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

RandomForestClassifier(max_depth=10, random_state=42):
  MSE: 0.17144638403990026
  MAE: 0.1701995012468828
  R2: 0.5961669274354724


In [132]:
import os
import pandas as pd
import re 
import simulator
import mlflow
from sklearn.preprocessing import StandardScaler

totalx, totaly = load_data()
totalx_train, totalx_test, totaly_train, totaly_test = make_train_test_split(totalx, totaly)

d = {'Formula': ['C10Mg90','Al90Ni10'],'Phase':['CRA','BMG']}
df=pd.DataFrame(data=d)
df=simulator.clean_data(df)
#Leer el archivo de "TablaPeriodica.csv"
df_tabla_periodica = pd.read_csv("../Inputs/TablaPeriodica.csv", sep=";")
for i in range(1, 9):
    df[f"Elem{i}"]=df[f'Elem{i}'].astype(object)
    # Realizar el "merge" entre df1 y df2 en función de las columnas de elementos
    df =df.merge(df_tabla_periodica, left_on=f"Elem{i}", right_on="Element", how="left")
print('Cantidad de registros:', len(df))
print('Cantidad de columnas:', len(df.columns))
#Eliminar las columnas que no se necesitan
df = df.drop(columns=["Elem1", "Elem2", "Elem3", "Elem4", "Elem5", "Elem6", "Elem7", "Elem8","Element_x", "Formula", "CantElemen"])
df_total = df.copy()
#Tipos de la columna "Phase"
print(df_total["Phase"].unique())
#Reemplazar BMG por 0, RMG por 1 y CRA por 2
df_total["Phase"] = df_total["Phase"].replace({"BMG": 0, "RMG": 1, "CRA": 2})
#Tipos de la columna "Phase"
print(df_total["Phase"].unique())
y = df_total["Phase"]
#Las características de las columnas
X = df_total.drop(columns=["Phase"])
from sklearn.model_selection import train_test_split
#Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.5)
X_train = X_train.apply(pd.to_numeric, errors='coerce')
X_test = X_test.apply(pd.to_numeric, errors='coerce')
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(totalx_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled = pd.DataFrame(X_train_scaled).fillna(0)
X_test_scaled = pd.DataFrame(X_test_scaled).fillna(0)
#X_train_scaled = scaler.fit_transform(X_train_scaled)
#X_test_scaled = scaler.transform(X_test_scaled)
print(X_train)
print(X_test_scaled)
model_name = "Phase"
model_version = 1
model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{model_name}/{model_version}"
)


final=model.predict(X_test_scaled)

df2=pd.DataFrame(final,columns=["Phase"])

df2["Phase"] = df2["Phase"].replace({0:"BMG", 1:"RMG", 2:"CRA"})
df2.head()
tr=df2.loc[0,'Phase']
tr





  df = pd.read_csv("../model/total_data.csv")
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  df =df.merge(df_tabla_periodica, left_on=f"Elem{i}", right_on="Element", how="left")
  df =df.merge(df_tabla_periodica, left_on=f"Elem{i}", right_on="Element", how="left")
  df =df.merge(df_tabla_periodica, left_on=f"Elem{i}", right_on="Element", how="left")


Cantidad de registros: 2
Cantidad de columnas: 211
['CRA' 'BMG']
[2 0]
   Compo1  Compo2  Compo3  Compo4  Compo5  Compo6  Compo7  Compo8  Eea (ev)_x  \
0      10      90       0       0       0       0       0       0       1.262   

   I1 (ev)_x  ...  dVEC_y  XP_y  XM_y  Cp (J/molK)_y  K (W/m)/K 300K_y  W_y  \
0      11.26  ...     NaN   NaN   NaN            NaN               NaN  NaN   

   D_y  Hf (kJ/mol)_y  LP_y  Tb (K)_y  
0  NaN            NaN   NaN       NaN  

[1 rows x 196 columns]
    0     1             2             3             4             5    \
0  90.0  10.0 -7.015336e-17  2.030755e-17 -2.953826e-17  4.430738e-18   

            6    7       8      9    ...  186  187  188  189  190  191  192  \
0  5.907651e-18  0.0  0.4328  5.986  ...  0.0  0.0  0.0  0.0  0.0  0.0  0.0   

   193  194  195  
0  0.0  0.0  0.0  

[1 rows x 196 columns]




'RMG'

In [130]:
def predict():

    import mlflow

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    model_name = "Phase"
    model_version = 1

    model = mlflow.pyfunc.load_model(
        model_uri=f"models:/{model_name}/{model_version}"
    )
    print(x_test[0:10])

    return model.predict(x_test)


predict()

  df = pd.read_csv("../model/total_data.csv")
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


        0         1         2        3         4         5         6    \
0 -0.850255  1.429578 -0.609759 -0.31219 -0.213643 -0.139041 -0.066768   
1 -0.957873  1.465348 -0.523279 -0.31219 -0.213643 -0.139041 -0.066768   
2 -0.491526  0.714198 -0.177360 -0.31219 -0.213643 -0.139041 -0.066768   
3  0.620534 -0.537718 -0.004400 -0.31219 -0.213643 -0.139041 -0.066768   
4 -0.527399 -0.537718  1.379277 -0.31219 -0.213643 -0.139041 -0.066768   
5 -0.132797  0.714198 -0.609759 -0.31219 -0.213643 -0.139041 -0.066768   
6 -0.814382 -0.323104  1.465756 -0.31219 -0.213643 -0.139041 -0.066768   
7  0.570312  0.048894 -0.652999 -0.31219 -0.213643 -0.139041 -0.066768   
8  0.297678 -0.001183 -0.263840 -0.31219 -0.213643 -0.139041 -0.066768   
9  1.732594 -0.788102 -1.042158 -0.31219 -0.213643 -0.139041 -0.066768   

        7         8         9    ...  186  187  188  189  190  191  192  193  \
0 -0.032516  1.414217  2.925878  ...  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   
1 -0.032516 -0.578413  0.

array([1, 1, 1, ..., 2, 2, 1])

In [142]:
import modellist

modellist.devolver_phase('Ag8C92')

  df = pd.read_csv("../model/total_data.csv")
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


UnboundLocalError: local variable 'element' referenced before assignment