##### Validación cruzada

In [1]:
import pandas as pd
from joblib import load
import numpy as np

from sklearn.model_selection import cross_validate
from sklearn.metrics import precision_score, recall_score, f1_score

##### Cargar modelos entrenados

In [2]:
models = {
    "KNN": load("results/knn_model.joblib"),
    "DT": load("results/dt_model.joblib"),
    "GradientBoosting": load("results/gradientBoosting_model.joblib"),
    "ExtraTrees": load("results/extraTrees_model.joblib")
}

##### Leer datos y respuestas

In [3]:
train_data = pd.read_csv("data_practico1/train_data.csv")
val_data = pd.read_csv("data_practico1/val_data.csv")
test_data = pd.read_csv("data_practico1/test_data.csv")

y_train = np.load("data_practico1/y_train.npy")
y_val = np.load("data_practico1/y_val.npy")
y_test = np.load("data_practico1/y_test.npy")

#### Ejecutar validación cruzada y calcular métricas de desempeño

In [4]:
scoring = {
    'accuracy': 'accuracy',
    'recall': 'recall',
    'precision': 'precision',
    'f1': 'f1'
}

In [5]:
results = []

for model_name, model in models.items():
   
    scores = cross_validate(model, train_data, y_train, cv=10, scoring=scoring, return_train_score=True)
    
    # Promedios training
    avg_train_accuracy = np.mean(scores['train_accuracy'])
    avg_train_recall = np.mean(scores['train_recall'])
    avg_train_precision = np.mean(scores['train_precision'])
    avg_train_f1 = np.mean(scores['train_f1'])
    
    # Promedios test
    avg_test_accuracy = np.mean(scores['test_accuracy'])
    avg_test_recall = np.mean(scores['test_recall'])
    avg_test_precision = np.mean(scores['test_precision'])
    avg_test_f1 = np.mean(scores['test_f1'])
    
    # Evaluación en validación
    model.fit(train_data, y_train)  # Ajuste completo del modelo
    val_accuracy = model.score(val_data, y_val)
    val_predictions = model.predict(val_data)
    
    val_recall = recall_score(y_val, val_predictions)
    val_precision = precision_score(y_val, val_predictions)
    val_f1 = f1_score(y_val, val_predictions)
    
    results.append({
        'Model': model_name,

        'Train accuracy': avg_train_accuracy,
        'Test accuracy': avg_test_accuracy,
        'Validation accuracy': val_accuracy,

        'Train recall': avg_train_recall,
        'Test recall': avg_test_recall,
        'Validation recall': val_recall,

        'Train precision': avg_train_precision,
        'Test precision': avg_test_precision,
        'Validation precision': val_precision,

        'Train F1-score': avg_train_f1,
        'Test F1-score': avg_test_f1,
        'Validation F1-score': val_f1
    })

results_df = pd.DataFrame(results)

In [6]:
results_df

Unnamed: 0,Model,Train accuracy,Test accuracy,Validation accuracy,Train recall,Test recall,Validation recall,Train precision,Test precision,Validation precision,Train F1-score,Test F1-score,Validation F1-score
0,KNN,0.714102,0.577673,0.57161,0.454759,0.257829,0.238616,0.646923,0.374697,0.336761,0.534052,0.305182,0.279318
1,DT,1.0,0.540742,0.534221,1.0,0.387623,0.367942,1.0,0.369534,0.342373,1.0,0.378065,0.354697
2,GradientBoosting,0.666878,0.632847,0.647655,0.079436,0.019358,0.010929,0.953391,0.342143,0.315789,0.146524,0.036325,0.021127
3,ExtraTrees,1.0,0.629043,0.648923,1.0,0.025083,0.029144,1.0,0.313109,0.432432,1.0,0.046296,0.054608
