In [1]:
import joblib
import warnings
import sys
sys.path.append('../src/utils')

# Core
from utils_functions import *
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:,.2f}'.format
warnings.simplefilter('ignore')
%load_ext autoreload
%autoreload 2

In [2]:
# Modelo final
final_model = joblib.load('../models/OptimizedRandomForestClassifier.pkl')

# Lectura de los subsets
X_train = pd.read_csv('../data/processed/X_train.csv')
X_val = pd.read_csv('../data/processed/X_val.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')

y_train = pd.read_csv('../data/processed/y_train.csv')
y_val = pd.read_csv('../data/processed/y_val.csv')
y_test = pd.read_csv('../data/processed/y_test.csv')

X_train.drop(['hours-per-week'], axis=1, inplace=True)
X_val.drop(['hours-per-week'], axis=1, inplace=True)
X_test.drop(['hours-per-week'], axis=1, inplace=True)

In [3]:
from sklearn.metrics import log_loss

# Evaluar el mejor modelo en el conjunto de entrenamiento
y_pred_train = final_model.predict_proba(X_train)[:, 1]
train_log_loss = log_loss(y_train, y_pred_train)

# Evaluar el mejor modelo en el conjunto de validación
y_pred_val = final_model.predict_proba(X_val)[:, 1]
val_log_loss = log_loss(y_val, y_pred_val)

# Evaluar el mejor modelo en el conjunto de prueba
test_pred_val = final_model.predict_proba(X_test)[:, 1]
test_log_loss = log_loss(y_test, test_pred_val)

print('=== RESULTADOS FINALES ===')
print(f'Log-loss train set: {100*train_log_loss:0.2f}%')
print(f'Log-loss validation set: {100*val_log_loss:0.2f}%')
print(f'Log-loss test set: {100*test_log_loss:0.2f}%')

=== RESULTADOS FINALES ===
Log-loss train set: 35.85%
Log-loss validation set: 35.83%
Log-loss test set: 37.16%


In [4]:
from sklearn.metrics import roc_auc_score

# Evaluar el mejor modelo en el conjunto de entrenamiento
y_pred_train = final_model.predict_proba(X_train)[:, 1]
train_roc_auc = roc_auc_score(y_train, y_pred_train)

# Evaluar el mejor modelo en el conjunto de validación
y_pred_val = final_model.predict_proba(X_val)[:, 1]
val_roc_auc = roc_auc_score(y_val, y_pred_val)

# Evaluar el mejor modelo en el conjunto de prueba
test_pred_val = final_model.predict_proba(X_test)[:, 1]
test_roc_auc = roc_auc_score(y_test, test_pred_val)

print('=== RESULTADOS FINALES ===')
print(f'ROC-AUC train set: {100*train_roc_auc:0.2f}%')
print(f'ROC-AUC validation set: {100*val_roc_auc:0.2f}%')
print(f'ROC-AUC test set: {100*test_roc_auc:0.2f}%')

=== RESULTADOS FINALES ===
ROC-AUC train set: 88.00%
ROC-AUC validation set: 87.89%
ROC-AUC test set: 86.87%
