In [54]:
# Adaptado para series temporales: TimeSeriesSplit + Early Stopping

import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report, f1_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import lightgbm as lgb
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import joblib

# Cargar datos
path = '/Users/melaniealvarez/Documents/Octavo semestres/Data Mining/trading/project_trading_DT/data/processed/TLT_clean.csv'
df = pd.read_csv(path)

# Separar entrenamiento y prueba
train_df = df[df['Date'] <= '2025-02-28']
test_df = df[df['Date'] >= '2025-03-01']

selected_features = [  # Copiar de tu script de generación de features
    "day_of_week", "is_month_end", "month",
    "price_diff", "pct_diff", "log_vol",
    "return_lag_1", "return_lag_2", "return_lag_3", "return_lag_4", "return_lag_5",
    "sma_5", "ema_5", "rolling_std_return_5",
    "rsi_5", "macd", "macd_signal", "macd_above_signal",
    "return_volatility_ratio",
    "us10y_yield", "fed_rate", "cpi", "unemployment_rate"
]

X_train = train_df[selected_features].copy()
y_train = train_df['target']
X_test = test_df[selected_features].copy()
y_test = test_df['target']

# Validación para early stopping (10% final del entrenamiento)
val_size = int(len(X_train) * 0.1)
X_train_base, X_val = X_train.iloc[:-val_size], X_train.iloc[-val_size:]
y_train_base, y_val = y_train.iloc[:-val_size], y_train.iloc[-val_size:]

# Funciones

from xgboost.callback import EarlyStopping

from lightgbm import early_stopping


def train_and_evaluate(model, X_train, X_test, y_train, y_test, model_name, X_val=None, y_val=None):
    if isinstance(model, XGBClassifier) and X_val is not None:
        model.set_params(early_stopping_rounds=10, eval_metric="error")
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_val, y_val)],
            verbose=False
        )
    elif isinstance(model, lgb.LGBMClassifier) and X_val is not None:
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_val, y_val)],
            eval_metric="error",
            callbacks=[early_stopping(10)]
        )
    elif isinstance(model, CatBoostClassifier) and X_val is not None:
        model.fit(
            X_train,
            y_train,
            eval_set=(X_val, y_val),
            early_stopping_rounds=10
        )
    else:
        model.fit(X_train, y_train)

    # Predecir probabilidades
    y_proba = model.predict_proba(X_test)[:, 1]

    # Buscar mejor threshold entre 0.3 y 0.7
    best_thresh = 0.5
    best_macro_prec = 0
    for thresh in np.arange(0.3, 0.71, 0.05):
        y_pred_temp = (y_proba > thresh).astype(int)
        macro_prec = precision_score(y_test, y_pred_temp, average="macro", zero_division=0)
        if macro_prec > best_macro_prec:
            best_macro_prec = macro_prec
            best_thresh = thresh

    print(f"✅ Mejor threshold para {model_name}: {best_thresh:.2f} con precisión macro: {best_macro_prec:.4f}")

    # Usar el mejor threshold para predicción final
    y_pred = (y_proba > best_thresh).astype(int)

    print(f'{model_name} Accuracy: {accuracy_score(y_test, y_pred):.4f}')
    print(f'{model_name} Precision (Macro): {precision_score(y_test, y_pred, average="macro"):.4f}')
    print(f'{model_name} Recall (Macro): {recall_score(y_test, y_pred, average="macro"):.4f}')
    print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}')
    print(f'Classification Report:\n{classification_report(y_test, y_pred)}')

    if hasattr(model, 'feature_importances_'):
        print("Feature Importances:")
        for name, importance in zip(X_train.columns, model.feature_importances_):
            print(f"{name}: {importance:.4f}")

    model.best_threshold = best_thresh

    return model


from sklearn.metrics import make_scorer, precision_score

# Métrica personalizada para solo la clase positiva (label 1)
positive_precision = make_scorer(precision_score, pos_label=1, average='binary', zero_division=0)

def hyperparameter_tuning(model, X, y, param_grid):
    tscv = TimeSeriesSplit(n_splits=10)
    search = RandomizedSearchCV(
        model,
        param_distributions=param_grid,
        n_iter=20,
        cv=tscv,
        scoring=positive_precision,  # Aquí cambiamos la métrica
        n_jobs=-1,
        random_state=42
    )
    search.fit(X, y)
    print(f"Best params for {model.__class__.__name__}: {search.best_params_}")
    return search.best_estimator_


# Modelos y grids
models = {
    "XGBoost": XGBClassifier(random_state=42),
    "LightGBM": lgb.LGBMClassifier(random_state=42),
    "CatBoost": CatBoostClassifier(random_state=42, verbose=0),
    "RandomForest": RandomForestClassifier(random_state=42),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "GradientBoosting": GradientBoostingClassifier(random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42)
}

param_grids = {
    "XGBoost": {
        "n_estimators": [100, 300, 500],
        "learning_rate": [0.01, 0.1],
        "max_depth": [3, 5, 10]
    },
    "LightGBM": {
        "n_estimators": [100, 300, 500],
        "learning_rate": [0.01, 0.1],
        "num_leaves": [31, 50, 70]
    },
    "CatBoost": {
        "iterations": [500, 1000],
        "learning_rate": [0.01, 0.1],
        "depth": [6, 10]
    },
    "RandomForest": {
        "n_estimators": [100, 300],
        "max_depth": [10, 20],
        "min_samples_split": [2, 5]
    },
    "DecisionTree": {
        "max_depth": [3, 5, 10],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 5]
    },
    "GradientBoosting": {
        "n_estimators": [100, 200],
        "learning_rate": [0.01, 0.1],
        "max_depth": [3, 5]
    },
    "AdaBoost": {
        "n_estimators": [50, 100],
        "learning_rate": [0.01, 0.1]
    }
}

# Entrenamiento
best_model = None
best_score = -np.inf

trained_models = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    tuned_model = hyperparameter_tuning(model, X_train_base, y_train_base, param_grids[name])
    final_model = train_and_evaluate(tuned_model, X_train_base, X_test, y_train_base, y_test, name, X_val=X_val, y_val=y_val)
    
    trained_models[name] = final_model  # ✅ guardar el modelo entrenado

    # Guardar modelo + columnas de entrenamiento
    model_path = f"/Users/melaniealvarez/Documents/Octavo semestres/Data Mining/trading/project_trading_DT/models/TLT_{name}.pkl"
    model_bundle = {
        "model": final_model,
        "features": selected_features
    }
    joblib.dump(model_bundle, model_path)
    print(f"💾 Modelo + features guardados en: {model_path}")


    # Evaluar precisión y guardar mejor modelo
    score = precision_score(y_test, final_model.predict(X_test), average='macro')
    if score > best_score:
        best_score = score
        best_model = final_model


print(f"\nBest Model: {best_model} with macro precision {best_score:.4f}")

# 🧾 Resumen de métricas por modelo
print("\n📋 Resumen de modelos:\n")
summary_data = []

for name, model in trained_models.items():
    y_proba = model.predict_proba(X_test)[:, 1]
    threshold = getattr(model, 'best_threshold', 0.5)  # Por si acaso
    y_pred = (y_proba > threshold).astype(int)

    precision_macro = precision_score(y_test, y_pred, average='macro', zero_division=0)
    recall_macro = recall_score(y_test, y_pred, average='macro', zero_division=0)
    f1_macro = f1_score(y_test, y_pred, average='macro', zero_division=0)

    summary_data.append({
        "Modelo": name,
        "Threshold": round(threshold, 2),
        "Precisión Macro": round(precision_macro, 4),
        "Recall Macro": round(recall_macro, 4),
        "F1-score Macro": round(f1_macro, 4)
    })

# Mostrar como tabla ordenada por precisión macro
import pandas as pd
summary_df = pd.DataFrame(summary_data)
summary_df = summary_df.sort_values(by="Precisión Macro", ascending=False)
print(summary_df.to_string(index=False))




Training XGBoost...




Best params for XGBClassifier: {'n_estimators': 300, 'max_depth': 10, 'learning_rate': 0.1}
✅ Mejor threshold para XGBoost: 0.60 con precisión macro: 0.8269
XGBoost Accuracy: 0.7000
XGBoost Precision (Macro): 0.8269
XGBoost Recall (Macro): 0.6538
Confusion Matrix:
[[17  0]
 [ 9  4]]
Classification Report:
              precision    recall  f1-score   support

           0       0.65      1.00      0.79        17
           1       1.00      0.31      0.47        13

    accuracy                           0.70        30
   macro avg       0.83      0.65      0.63        30
weighted avg       0.80      0.70      0.65        30

Feature Importances:
day_of_week: 0.0414
is_month_end: 0.0702
month: 0.0352
price_diff: 0.0418
pct_diff: 0.0441
log_vol: 0.0392
return_lag_1: 0.0402
return_lag_2: 0.0385
return_lag_3: 0.0406
return_lag_4: 0.0389
return_lag_5: 0.0423
sma_5: 0.0373
ema_5: 0.0569
rolling_std_return_5: 0.0417
rsi_5: 0.0416
macd: 0.0426
macd_signal: 0.0433
macd_above_signal: 0.0373
ret



[LightGBM] [Info] Number of positive: 466, number of negative: 402
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3927
[LightGBM] [Info] Number of data points in the train set: 868, number of used features: 23
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.536866 -> initscore=0.147734
[LightGBM] [Info] Start training from score 0.147734
[LightGBM] [Info] Number of positive: 624, number of negative: 533
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001318 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3948
[LightGBM] [Info] Number of data points in the train set: 1157, number of used features: 23
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.539326 -> initscore=0.157629
[LightGBM] [Info] Start training from score 0.157629
[LightGBM] [Info] Number of



Best params for CatBoostClassifier: {'learning_rate': 0.01, 'iterations': 500, 'depth': 6}
✅ Mejor threshold para CatBoost: 0.50 con precisión macro: 0.4667
CatBoost Accuracy: 0.4667
CatBoost Precision (Macro): 0.4667
CatBoost Recall (Macro): 0.4661
Confusion Matrix:
[[8 9]
 [7 6]]
Classification Report:
              precision    recall  f1-score   support

           0       0.53      0.47      0.50        17
           1       0.40      0.46      0.43        13

    accuracy                           0.47        30
   macro avg       0.47      0.47      0.46        30
weighted avg       0.48      0.47      0.47        30

Feature Importances:
day_of_week: 3.7138
is_month_end: 0.4344
month: 3.1911
price_diff: 7.7147
pct_diff: 4.6418
log_vol: 8.3710
return_lag_1: 6.7294
return_lag_2: 3.3759
return_lag_3: 0.2591
return_lag_4: 2.7900
return_lag_5: 8.2466
sma_5: 5.0942
ema_5: 4.2434
rolling_std_return_5: 3.4348
rsi_5: 13.1011
macd: 5.2882
macd_signal: 2.0582
macd_above_signal: 0.5861
ret



Best params for RandomForestClassifier: {'n_estimators': 100, 'min_samples_split': 2, 'max_depth': 10}
✅ Mejor threshold para RandomForest: 0.65 con precisión macro: 0.7931
RandomForest Accuracy: 0.6000
RandomForest Precision (Macro): 0.7931
RandomForest Recall (Macro): 0.5385
Confusion Matrix:
[[17  0]
 [12  1]]
Classification Report:
              precision    recall  f1-score   support

           0       0.59      1.00      0.74        17
           1       1.00      0.08      0.14        13

    accuracy                           0.60        30
   macro avg       0.79      0.54      0.44        30
weighted avg       0.77      0.60      0.48        30

Feature Importances:
day_of_week: 0.0238
is_month_end: 0.0021
month: 0.0254
price_diff: 0.0562
pct_diff: 0.0579
log_vol: 0.0590
return_lag_1: 0.0617
return_lag_2: 0.0569
return_lag_3: 0.0567
return_lag_4: 0.0540
return_lag_5: 0.0572
sma_5: 0.0471
ema_5: 0.0494
rolling_std_return_5: 0.0510
rsi_5: 0.0616
macd: 0.0504
macd_signal: 0.050



Best params for DecisionTreeClassifier: {'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 5}
✅ Mejor threshold para DecisionTree: 0.60 con precisión macro: 0.8036
DecisionTree Accuracy: 0.6333
DecisionTree Precision (Macro): 0.8036
DecisionTree Recall (Macro): 0.5769
Confusion Matrix:
[[17  0]
 [11  2]]
Classification Report:
              precision    recall  f1-score   support

           0       0.61      1.00      0.76        17
           1       1.00      0.15      0.27        13

    accuracy                           0.63        30
   macro avg       0.80      0.58      0.51        30
weighted avg       0.78      0.63      0.54        30

Feature Importances:
day_of_week: 0.0000
is_month_end: 0.0000
month: 0.0000
price_diff: 0.0515
pct_diff: 0.0000
log_vol: 0.1176
return_lag_1: 0.0836
return_lag_2: 0.0000
return_lag_3: 0.0451
return_lag_4: 0.0746
return_lag_5: 0.0615
sma_5: 0.0999
ema_5: 0.1907
rolling_std_return_5: 0.0000
rsi_5: 0.1152
macd: 0.0420
macd_signal: 0.00



Best params for GradientBoostingClassifier: {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.1}
✅ Mejor threshold para GradientBoosting: 0.70 con precisión macro: 0.6827
GradientBoosting Accuracy: 0.6333
GradientBoosting Precision (Macro): 0.6827
GradientBoosting Recall (Macro): 0.5860
Confusion Matrix:
[[16  1]
 [10  3]]
Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.94      0.74        17
           1       0.75      0.23      0.35        13

    accuracy                           0.63        30
   macro avg       0.68      0.59      0.55        30
weighted avg       0.67      0.63      0.57        30

Feature Importances:
day_of_week: 0.0204
is_month_end: 0.0027
month: 0.0079
price_diff: 0.0789
pct_diff: 0.0498
log_vol: 0.0831
return_lag_1: 0.0789
return_lag_2: 0.0666
return_lag_3: 0.0861
return_lag_4: 0.0447
return_lag_5: 0.0640
sma_5: 0.0374
ema_5: 0.0535
rolling_std_return_5: 0.0531
rsi_5: 0.0741
macd: 0.0506
m



Best params for AdaBoostClassifier: {'n_estimators': 100, 'learning_rate': 0.1}




✅ Mejor threshold para AdaBoost: 0.50 con precisión macro: 0.4861
AdaBoost Accuracy: 0.5000
AdaBoost Precision (Macro): 0.4861
AdaBoost Recall (Macro): 0.4864
Confusion Matrix:
[[10  7]
 [ 8  5]]
Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.59      0.57        17
           1       0.42      0.38      0.40        13

    accuracy                           0.50        30
   macro avg       0.49      0.49      0.49        30
weighted avg       0.50      0.50      0.50        30

Feature Importances:
day_of_week: 0.0500
is_month_end: 0.0000
month: 0.0000
price_diff: 0.1200
pct_diff: 0.0000
log_vol: 0.1700
return_lag_1: 0.0900
return_lag_2: 0.0200
return_lag_3: 0.1200
return_lag_4: 0.0200
return_lag_5: 0.0100
sma_5: 0.0500
ema_5: 0.0600
rolling_std_return_5: 0.0200
rsi_5: 0.0700
macd: 0.0200
macd_signal: 0.0800
macd_above_signal: 0.0000
return_volatility_ratio: 0.0600
us10y_yield: 0.0000
fed_rate: 0.0000
cpi: 0.0400
unemployme

In [55]:
ratio = (y_train == 0).sum() / (y_train == 1).sum()
ratio

0.9169831795984807


💰 Simulando inversión con modelo: XGBoost | Threshold: 0.5

💰 Simulando inversión con modelo: XGBoost | Threshold: 0.6

💰 Simulando inversión con modelo: XGBoost | Threshold: 0.7

💰 Simulando inversión con modelo: LightGBM | Threshold: 0.5

💰 Simulando inversión con modelo: LightGBM | Threshold: 0.6

💰 Simulando inversión con modelo: LightGBM | Threshold: 0.7

💰 Simulando inversión con modelo: CatBoost | Threshold: 0.5

💰 Simulando inversión con modelo: CatBoost | Threshold: 0.6

💰 Simulando inversión con modelo: CatBoost | Threshold: 0.7

💰 Simulando inversión con modelo: RandomForest | Threshold: 0.5

💰 Simulando inversión con modelo: RandomForest | Threshold: 0.6

💰 Simulando inversión con modelo: RandomForest | Threshold: 0.7

💰 Simulando inversión con modelo: DecisionTree | Threshold: 0.5

💰 Simulando inversión con modelo: DecisionTree | Threshold: 0.6

💰 Simulando inversión con modelo: DecisionTree | Threshold: 0.7

💰 Simulando inversión con modelo: GradientBoosting | Threshold:

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["Close_next"] = test_df["Close"].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df.dropna(inplace=True)
