#### Evaluar metricas de desempeño de los modelos entrenados con parámetros por defecto

In [65]:
import pandas as pd
from joblib import load
import numpy as np

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, matthews_corrcoef, confusion_matrix

In [66]:
def splitting_binary_data(df):
    binary_cols = df.columns[df.nunique() == 2]
    non_binary_cols = df.columns.difference(binary_cols)

    df_binary = df[binary_cols]
    df_non_binary = df[non_binary_cols]

    return df_binary, df_non_binary

In [67]:
def predict_class(scaler, model, val_data, test_data):
    binary_val, non_binary_val = splitting_binary_data(val_data)
    binary_test, non_binary_test = splitting_binary_data(test_data)

    non_binary_val_scaled = scaler.transform(non_binary_val)
    non_binary_test_scaled = scaler.transform(non_binary_test)

    non_binary_val_scaled_df = pd.DataFrame(non_binary_val_scaled, columns=non_binary_val.columns)
    non_binary_test_scaled_df = pd.DataFrame(non_binary_test_scaled, columns=non_binary_test.columns)

    val_scaled = pd.concat([binary_val, non_binary_val_scaled_df], axis=1)
    test_scaled = pd.concat([binary_test, non_binary_test_scaled_df], axis=1)

    val_predictions = model.predict(X=val_scaled)
    test_predictions = model.predict(X=test_scaled)

    return val_predictions, test_predictions


In [68]:
def metrics_model(predict_val, y_val, dataset, scaler, model):
    acc_value = accuracy_score(y_pred=predict_val, y_true=y_val) 
    recall_value = recall_score(y_pred=predict_val, y_true=y_val, average='weighted')
    precision_value = precision_score(y_pred=predict_val, y_true=y_val, average='weighted') 
    f1_value = f1_score(y_pred=predict_val, y_true=y_val, average='weighted')
    mcc_value = matthews_corrcoef(y_pred=predict_val, y_true=y_val)
    cm = confusion_matrix(y_pred=predict_val, y_true=y_val)

    df_metrics = pd.DataFrame([[dataset, scaler, model, acc_value, recall_value, precision_value, f1_value, mcc_value, cm]],
                              columns=["dataset", "scaler", "model", "acc", "recall", "precision", "f1", "mcc", "cm"])

    return df_metrics

##### Cargar escalador

In [69]:
scalers = {"RobustScaler": load("results/scaler_robust.joblib")}

##### Cargar modelos entrenados

In [70]:
models = {
    "KNN": load("results/knn_model.joblib"),
    "DT": load("results/dt_model.joblib"),
    "SVM": load("results/svm_model.joblib"),
    "ADA": load("results/ada_model.joblib"),
    "RF": load("results/rf_model.joblib")
}

##### Leer datos y respuestas de validación y testeo

In [71]:
val_data = pd.read_csv("process_dataset/val_data.csv")
test_data = pd.read_csv("process_dataset/test_data.csv")

y_val = np.load("process_dataset/y_val.npy")
y_test = np.load("process_dataset/y_test.npy")

##### Aplicar metricas a los modelos

In [72]:
results = []

for scaler_name, scaler_instance in scalers.items():
    for model_name, model in models.items():
        predict_val, predict_test = predict_class(scaler_instance, model, val_data, test_data)
        results.append(metrics_model(predict_val, y_val, "Validation", scaler_name, model_name))
        results.append(metrics_model(predict_test, y_test, "Test", scaler_name, model_name))

In [73]:
df_result = pd.concat(results)
df_result = df_result.reset_index(drop=True)

In [74]:
df_result

Unnamed: 0,dataset,scaler,model,acc,recall,precision,f1,mcc,cm
0,Validation,RobustScaler,KNN,0.512389,0.512389,0.514236,0.512702,0.025664,"[[279, 256], [295, 300]]"
1,Test,RobustScaler,KNN,0.501592,0.501592,0.501613,0.501601,0.003063,"[[154, 156], [157, 161]]"
2,Validation,RobustScaler,DT,0.525664,0.525664,0.526398,0.525926,0.050098,"[[275, 260], [276, 319]]"
3,Test,RobustScaler,DT,0.528662,0.528662,0.529503,0.527873,0.058586,"[[177, 133], [163, 155]]"
4,Validation,RobustScaler,SVM,0.499115,0.499115,0.500806,0.499454,-0.001208,"[[270, 265], [301, 294]]"
5,Test,RobustScaler,SVM,0.496815,0.496815,0.497327,0.496218,-0.005491,"[[165, 145], [171, 147]]"
6,Validation,RobustScaler,ADA,0.487611,0.487611,0.491805,0.486378,-0.019003,"[[292, 243], [336, 259]]"
7,Test,RobustScaler,ADA,0.523885,0.523885,0.524523,0.523372,0.048736,"[[173, 137], [162, 156]]"
8,Validation,RobustScaler,RF,0.5,0.5,0.503547,0.499559,0.004248,"[[290, 245], [320, 275]]"
9,Test,RobustScaler,RF,0.496815,0.496815,0.497923,0.493092,-0.004242,"[[181, 129], [187, 131]]"


In [64]:
df_result

Unnamed: 0,dataset,scaler,model,acc,recall,precision,f1,mcc,cm
0,Validation,RobustScaler,KNN,0.496559,0.496559,0.496572,0.496563,-0.006903,"[[255, 257], [255, 250]]"
1,Test,RobustScaler,KNN,0.532743,0.532743,0.532618,0.532354,0.065084,"[[141, 139], [125, 160]]"
2,Validation,RobustScaler,DT,0.477876,0.477876,0.477897,0.477882,-0.044256,"[[245, 267], [264, 241]]"
3,Test,RobustScaler,DT,0.515044,0.515044,0.514889,0.514734,0.029675,"[[137, 143], [131, 154]]"
4,Validation,RobustScaler,SVM,0.498525,0.498525,0.498358,0.498164,-0.003327,"[[269, 243], [267, 238]]"
5,Test,RobustScaler,SVM,0.518584,0.518584,0.518527,0.51853,0.036976,"[[142, 138], [134, 151]]"
6,Validation,RobustScaler,ADA,0.47296,0.47296,0.472439,0.471813,-0.054965,"[[266, 246], [290, 215]]"
7,Test,RobustScaler,ADA,0.507965,0.507965,0.508094,0.507928,0.016105,"[[145, 135], [143, 142]]"
8,Validation,RobustScaler,RF,0.492625,0.492625,0.491897,0.48935,-0.01606,"[[293, 219], [297, 208]]"
9,Test,RobustScaler,RF,0.488496,0.488496,0.488655,0.48839,-0.022754,"[[141, 139], [150, 135]]"


In [54]:
df_result

Unnamed: 0,dataset,scaler,model,acc,recall,precision,f1,mcc,cm
0,Validation,RobustScaler,KNN,0.588732,0.588732,0.557632,0.56982,-0.022497,"[[737, 234], [350, 99]]"
1,Test,RobustScaler,KNN,0.6109,0.6109,0.580075,0.592352,0.000884,"[[429, 123], [184, 53]]"
2,Validation,RobustScaler,DT,0.571831,0.571831,0.587342,0.578597,0.045512,"[[636, 335], [273, 176]]"
3,Test,RobustScaler,DT,0.555133,0.555133,0.573052,0.563145,-0.015726,"[[359, 193], [158, 79]]"
4,Validation,RobustScaler,SVM,0.683803,0.683803,0.467586,0.555393,0.0,"[[971, 0], [449, 0]]"
5,Test,RobustScaler,SVM,0.69962,0.69962,0.489468,0.575973,0.0,"[[552, 0], [237, 0]]"
6,Validation,RobustScaler,ADA,0.678873,0.678873,0.560403,0.559243,-0.005227,"[[959, 12], [444, 5]]"
7,Test,RobustScaler,ADA,0.705957,0.705957,0.726951,0.594859,0.111863,"[[550, 2], [230, 7]]"
8,Validation,RobustScaler,RF,0.683803,0.683803,0.626397,0.560497,0.029753,"[[967, 4], [445, 4]]"
9,Test,RobustScaler,RF,0.697085,0.697085,0.564383,0.57705,-0.007845,"[[549, 3], [236, 1]]"
