# Evaluacón de modelos

In [23]:
import joblib
import os

import pandas as pd
from sklearn.linear_model._logistic import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix


from config import MODELS_PATH

In [4]:
MODELS = os.path.join(MODELS_PATH, "refit")
TESTSETS = os.path.join(MODELS_PATH, "testsets")

In [16]:
models_predictions = dict()

for model in os.listdir(MODELS):
    name, _ = os.path.splitext(model)
    _, dataset = name.split("_")

    dataset_path = os.path.join(TESTSETS, F"X_test_{dataset}.csv")
    X = pd.read_csv(dataset_path)

    model_path = os.path.join(MODELS, model)
    trained_model = joblib.load(model_path)
    predictions = {"y_hat": trained_model.predict(X)}

    for method in ["predict_proba", "predict_log_proba", "predict_joint_log_proba"]:
        if hasattr(trained_model, method):
            key = method.replace("predict_", "").strip()
            predictions.update(
                {f"y_hat_{key}": getattr(trained_model, method)(X)}
            )

    models_predictions[name] = predictions

In [19]:
for key, value in models_predictions.items():
    print("{0:18} --- {1}".format(key, list(value.keys())))

svm_log-odds       --- ['y_hat', 'y_hat_proba', 'y_hat_log_proba']
nb_log-odds        --- ['y_hat', 'y_hat_proba', 'y_hat_log_proba', 'y_hat_joint_log_proba']
lr_log-odds        --- ['y_hat', 'y_hat_proba', 'y_hat_log_proba']
lr_proporciones    --- ['y_hat', 'y_hat_proba', 'y_hat_log_proba']
nb_proporciones    --- ['y_hat', 'y_hat_proba', 'y_hat_log_proba', 'y_hat_joint_log_proba']
svm_proporciones   --- ['y_hat', 'y_hat_proba', 'y_hat_log_proba']


## Reportes

In [None]:
y_path = os.path.join(TESTSETS, "y_test.csv")
y = pd.read_csv(y_path, header=None)

In [24]:
report = dict()

for key, value in models_predictions.items():
    model, dataset = key.split("_")
    print(f"Reporte para modelo {model} con dataset {dataset}")
    print(classification_report(y, value["y_hat"]))
    report[key] = classification_report(y, value["y_hat"], output_dict=True)

Reporte para modelo svm con dataset log-odds
              precision    recall  f1-score   support

           0       0.88      0.39      0.54        18
           1       0.66      0.95      0.78        22

    accuracy                           0.70        40
   macro avg       0.77      0.67      0.66        40
weighted avg       0.75      0.70      0.67        40

Reporte para modelo nb con dataset log-odds
              precision    recall  f1-score   support

           0       0.59      0.89      0.71        18
           1       0.85      0.50      0.63        22

    accuracy                           0.68        40
   macro avg       0.72      0.69      0.67        40
weighted avg       0.73      0.68      0.67        40

Reporte para modelo lr con dataset log-odds
              precision    recall  f1-score   support

           0       1.00      0.44      0.62        18
           1       0.69      1.00      0.81        22

    accuracy                           0.75      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Visualizaciones

In [None]:
# hacer matriz de confusión por ahora nomás