#  Ajuste de HiperparÃ¡metros

In [1]:
import subprocess, sys
from pathlib import Path
PROJECT_ROOT = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))
for pkg in ['pandas', 'numpy', 'scikit-learn', 'xgboost', 'lightgbm', 'joblib']: 
    try: __import__(pkg)
    except: subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', pkg])

In [2]:
import pandas as pd
import numpy as np
import sys; sys.path.insert(0, str(PROJECT_ROOT / 'src'))
from sampling import SMOTEENNBalancer
from modeling import tune_all_models, save_model
from evaluation import calculate_metrics
import warnings; warnings.filterwarnings('ignore')

DATA_PROCESSED = PROJECT_ROOT / 'data' / 'processed'
X_train_orig = np.load(DATA_PROCESSED / 'X_train.npy')
y_train_orig = np.load(DATA_PROCESSED / 'y_train.npy')
X_test = np.load(DATA_PROCESSED / 'X_test.npy')
y_test = np.load(DATA_PROCESSED / 'y_test.npy')

## Tuning

In [None]:
balancer = SMOTEENNBalancer(random_state=42)
X_train_balanced, y_train_balanced = balancer.apply_smoteenn(X_train_orig, y_train_orig, 'medium')
tuning_results = tune_all_models(X_train_balanced, y_train_balanced, search_type='random', cv=3)
eval_results = []
for name, res in tuning_results.items():
    if res.get('success') and res['best_model'] is not None:
        model = res['best_model']
        y_pred = model.predict(X_test)
        metrics = calculate_metrics(y_test, y_pred, model.predict_proba(X_test) if hasattr(model, 'predict_proba') else None)
        eval_results.append({'Model': name, 'F1_Weighted': metrics['f1_weighted'], 'Accuracy': metrics['accuracy']})
eval_df = pd.DataFrame(eval_results).sort_values('F1_Weighted', ascending=False)
print(eval_df)
best_model_name = eval_df.iloc[0]['Model']
best_model = tuning_results[best_model_name]['best_model']
metadata = {'model_name': best_model_name, 'f1_score': float(eval_df.iloc[0]['F1_Weighted'])}
save_model(best_model, DATA_PROCESSED / 'best_model.joblib', metadata)
print(' Mejor modelo guardado')


ðŸŽ¯ Aplicando SMOTEENN - Nivel: MEDIUM

ðŸ“Š DistribuciÃ³n original:
  Clase 0: 30422 (89.17%)
  Clase 1: 2067 (6.06%)
  Clase 2: 76 (0.22%)
  Clase 3: 1550 (4.54%)

ðŸ“Š DistribuciÃ³n balanceada:
  Clase 0: 20524 (56.34%) [-9898]
  Clase 1: 6084 (16.70%) [+4017]
  Clase 2: 5984 (16.43%) [+5908]
  Clase 3: 3835 (10.53%) [+2285]

âœ“ Total: 34115 â†’ 36427 muestras
ðŸŽ¯ Ajustando hiperparÃ¡metros de todos los modelos...


ðŸ”§ Ajustando hiperparÃ¡metros: LogisticRegression
   MÃ©todo: RANDOM, CV=3
   âœ“ Mejor score: 0.5071
   âœ“ Mejores parÃ¡metros: {'solver': 'lbfgs', 'penalty': 'l2', 'C': 10}

ðŸ”§ Ajustando hiperparÃ¡metros: RandomForest
   MÃ©todo: RANDOM, CV=3
   âœ“ Mejor score: 0.9036
   âœ“ Mejores parÃ¡metros: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': None}

ðŸ”§ Ajustando hiperparÃ¡metros: XGBoost
   MÃ©todo: RANDOM, CV=3
   âœ“ Mejor score: 0.9249
   âœ“ Mejores parÃ¡metros: {'subsample': 0.8, 'n_estimators': 200, 'max_depth': 9, 'l

AnÃ¡lisis: se Realizo bÃºsqueda de hiperparÃ¡metros (bÃºsqueda aleatoria/validaciÃ³n cruzada) sobre datos balanceados; el tuning muestra mejoras de ~3â€“5% respecto al baseline y el F1 final reportado quedÃ³ en el rango â‰ˆ 0.90â€“0.94. Los modelos basados en Ã¡rboles encabezaron el ranking.

Conclusiones: El ajuste de hiperparÃ¡metros consolidÃ³ un modelo con mejor equilibrio entre precisiÃ³n y recall (F1â‰ˆ0.90â€“0.94), apto para evaluaciÃ³n de despliegue.