In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
import lightgbm as lgb
import joblib
import os

In [2]:
# Cargar datos procesados
df = pd.read_csv("../../data/processed/SPLV_clean.csv", parse_dates=["Date"])

In [3]:
# Dividir en entrenamiento y prueba por fecha
train_df = df[df['Date'] <= '2025-02-28']
test_df = df[df['Date'] >= '2025-03-01']

In [4]:
# Features y target
X_train = train_df.drop(columns=['target', 'Close', 'Open', 'High', 'Low', 'Date'])
y_train = train_df['target']
X_test = test_df.drop(columns=['target', 'Close', 'Open', 'High', 'Low', 'Date'])
y_test = test_df['target']

# Crear carpeta si no existe
os.makedirs("../../models", exist_ok=True)

# Modelos base
models = {
    "DecisionTree": DecisionTreeClassifier(random_state=42, max_depth=5),
    "RandomForest": RandomForestClassifier(random_state=42, max_depth=10, n_estimators=100),
    "AdaBoost": AdaBoostClassifier(random_state=42, n_estimators=100),
    "GradientBoosting": GradientBoostingClassifier(random_state=42, learning_rate=0.1, n_estimators=100),
    "LightGBM": lgb.LGBMClassifier(random_state=42, n_estimators=100, learning_rate=0.05, num_leaves=31),
    "XGBoost": XGBClassifier(random_state=42, max_depth=5, learning_rate=0.05, n_estimators=100)
}

# Función para entrenar y evaluar
def train_and_evaluate(model, X_train, X_test, y_train, y_test, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')

    print(f"\n===== {model_name} =====")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision (macro): {prec:.4f}")
    print(f"Recall (macro): {rec:.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Importancia de features si aplica
    if hasattr(model, 'feature_importances_'):
        print("Feature Importances:")
        for name, val in zip(X_train.columns, model.feature_importances_):
            print(f"{name}: {val:.4f}")

    # Guardar modelo con nuevo nombre y ruta
    joblib.dump(model, f"../../models/modeloSPLV_{model_name}.pkl")

# Entrenamiento de todos los modelos
for model_name, model in models.items():
    train_and_evaluate(model, X_train, X_test, y_train, y_test, model_name)


===== DecisionTree =====
Accuracy: 0.5714
Precision (macro): 0.2963
Recall (macro): 0.4706
Confusion Matrix:
[[16  1]
 [11  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.94      0.73        17
           1       0.00      0.00      0.00        11

    accuracy                           0.57        28
   macro avg       0.30      0.47      0.36        28
weighted avg       0.36      0.57      0.44        28

Feature Importances:
day_of_week: 0.0000
month: 0.0000
is_month_end: 0.0000
price_diff: 0.0000
pct_diff: 0.0000
return_daily: 0.1403
return_lag_1: 0.0000
return_lag_2: 0.0000
return_lag_3: 0.0280
return_lag_4: 0.0240
return_lag_5: 0.0000
sma_5: 0.0000
sma_10: 0.1610
rolling_std_return_5: 0.2324
RSI_5: 0.1437
MACD: 0.2707
MACD_signal: 0.0000
bb_middle: 0.0000
bb_upper: 0.0000
bb_lower: 0.0000
volume_outlier: 0.0000
price_above_SMA50: 0.0000
RSI_overbought: 0.0000
MACD_above_signal: 0.0000

===== RandomForest =====
Ac