In [12]:
import warnings
warnings.filterwarnings('ignore')

#### Evaluar metricas de desempeño de los modelos entrenados con parámetros por defecto

In [13]:
import pandas as pd
from joblib import load
import numpy as np

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, matthews_corrcoef, confusion_matrix

In [14]:
def predict_class(scaler_instance, model, val_data, test_data):
    val_data_scaler = scaler_instance.transform(val_data.values)
    test_data_scaler = scaler_instance.transform(test_data.values)
    predict_val = model.predict(X=val_data_scaler)
    predict_test = model.predict(X=test_data_scaler)

    return predict_val, predict_test

In [15]:
def metrics_model(predict_val, y_val, dataset, scaler, model):
    acc_value = accuracy_score(y_pred=predict_val, y_true=y_val) 
    recall_value = recall_score(y_pred=predict_val, y_true=y_val, average='weighted')
    precision_value = precision_score(y_pred=predict_val, y_true=y_val, average='weighted') 
    f1_value = f1_score(y_pred=predict_val, y_true=y_val, average='weighted')
    mcc_value = matthews_corrcoef(y_pred=predict_val, y_true=y_val)
    cm = confusion_matrix(y_pred=predict_val, y_true=y_val)

    df_metrics = pd.DataFrame([[dataset, scaler, model, acc_value, recall_value, precision_value, f1_value, mcc_value, cm]],
                              columns=["dataset", "scaler", "model", "acc", "recall", "precision", "f1", "mcc", "cm"])

    return df_metrics

##### Cargar escalador

In [16]:
scalers = {"RobustScaler": load("data_practico1/scaler_robust.joblib")}

##### Cargar modelos entrenados

In [17]:
models = {
    "KNN": load("results/knn_model.joblib"),
    "DT": load("results/dt_model.joblib"),
    "GradientBoosting": load("results/gradientBoosting_model.joblib"),
    "ExtraTrees": load("results/extraTrees_model.joblib")
}

##### Leer datos y respuestas de validación y testeo

In [18]:
val_data = pd.read_csv("data_practico1/val_data.csv")
test_data = pd.read_csv("data_practico1/test_data.csv")

y_val = np.load("data_practico1/y_val.npy")
y_test = np.load("data_practico1/y_test.npy")

##### Aplicar metricas a los modelos

In [19]:
results = []

for scaler_name, scaler_instance in scalers.items():
    for model_name, model in models.items():
        predict_val, predict_test = predict_class(scaler_instance, model, val_data, test_data)
        results.append(metrics_model(predict_val, y_val, "Validation", scaler_name, model_name))
        results.append(metrics_model(predict_test, y_test, "Test", scaler_name, model_name))

In [20]:
df_result = pd.concat(results)
df_result = df_result.reset_index(drop=True)

In [21]:
df_result

Unnamed: 0,dataset,scaler,model,acc,recall,precision,f1,mcc,cm
0,Validation,RobustScaler,KNN,0.596958,0.596958,0.565368,0.574011,0.040554,"[[800, 229], [407, 142]]"
1,Test,RobustScaler,KNN,0.566705,0.566705,0.535348,0.544525,-0.006359,"[[419, 141], [239, 78]]"
2,Validation,RobustScaler,DT,0.530418,0.530418,0.5392,0.534413,-0.015537,"[[636, 393], [348, 201]]"
3,Test,RobustScaler,DT,0.530217,0.530217,0.537182,0.533388,-0.002607,"[[343, 217], [195, 122]]"
4,Validation,RobustScaler,GradientBoosting,0.647655,0.647655,0.534834,0.519348,-0.007444,"[[1016, 13], [543, 6]]"
5,Test,RobustScaler,GradientBoosting,0.629418,0.629418,0.515256,0.504957,-0.019543,"[[546, 14], [311, 6]]"
6,Validation,RobustScaler,ExtraTrees,0.647655,0.647655,0.570639,0.530845,0.02288,"[[1005, 24], [532, 17]]"
7,Test,RobustScaler,ExtraTrees,0.632839,0.632839,0.541002,0.508601,0.002156,"[[548, 12], [310, 7]]"
