In [5]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
import joblib
import optuna  # For hyperparameter tuning

# Tickers
tickers = ["BNB-USD", "BTC-USD", "ETH-USD", "XRP-USD"]

# Prepare data with feature engineering
def prepare_data(df):
    features = ["Open", "High", "Low", "Close", "Volume", "Lag1", "Lag7", "SMA7", "RSI14", "MACD", "Returns", "Volatility"]
    X = df[features].values
    y = df["Target"].values
    return X, y, features

# Objective function for Optuna hyperparameter tuning
def objective(trial, X_train_scaled, y_train, X_test_scaled, y_test):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'random_state': 42
    }
    
    model = XGBRegressor(**params)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    return mse

# Train and evaluate XGBoost model
for ticker in tickers:
    print(f"\nTraining XGBoost for {ticker}...")
    
    # Load data
    df = pd.read_csv(f"{ticker}_processed.csv", index_col="Date", parse_dates=True)
    X, y, feature_names = prepare_data(df)
    
    # Split data (80% train, 20% test, no shuffle for time series)
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    # Scale features
    scaler_X = StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    
    # Hyperparameter tuning with Optuna
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: objective(trial, X_train_scaled, y_train, X_test_scaled, y_test), n_trials=30)
    best_params = study.best_params
    print(f"Best parameters for {ticker}: {best_params}")
    
    # Train final model with best parameters
    model = XGBRegressor(**best_params)
    model.fit(X_train_scaled, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100
    test_dates = df.index[train_size:]
    actual_series = pd.Series(y_test, index=test_dates)
    pred_series = pd.Series(y_pred, index=test_dates)
    actual_direction = (actual_series.shift(-1) > actual_series).iloc[:-1].astype(int)
    predicted_direction = (pred_series.shift(-1) > pred_series).iloc[:-1].astype(int)
    directional_accuracy = (actual_direction == predicted_direction).mean() * 100
    
    print(f"XGBoost Results for {ticker}:")
    print(f"MSE: {mse:.2f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"Directional Accuracy: {directional_accuracy:.2f}%")
    
    # Feature importance
    importance = model.feature_importances_
    for feat, imp in zip(feature_names, importance):
        print(f"Feature {feat}: {imp:.4f}")
    
    # Save model and scaler
    joblib.dump(model, f"{ticker}_xgboost_model.pkl")
    joblib.dump(scaler_X, f"{ticker}_scaler_X_xgb.pkl")
    print(f"Model and scaler saved for {ticker}")

[I 2025-03-11 02:26:44,761] A new study created in memory with name: no-name-2cb05c60-0bfc-4fc0-9e79-12cf2393248e



Training XGBoost for BNB-USD...


[I 2025-03-11 02:26:45,311] Trial 0 finished with value: 2521.2303507776387 and parameters: {'n_estimators': 342, 'max_depth': 14, 'learning_rate': 0.013354108105531136, 'subsample': 0.6935739745886809, 'colsample_bytree': 0.6798556588365028}. Best is trial 0 with value: 2521.2303507776387.
[I 2025-03-11 02:26:45,817] Trial 1 finished with value: 2074.9031376856124 and parameters: {'n_estimators': 365, 'max_depth': 9, 'learning_rate': 0.017979624336556574, 'subsample': 0.931905949910842, 'colsample_bytree': 0.6190569286804392}. Best is trial 1 with value: 2074.9031376856124.
[I 2025-03-11 02:26:46,800] Trial 2 finished with value: 2585.954021709555 and parameters: {'n_estimators': 425, 'max_depth': 13, 'learning_rate': 0.021280498770927872, 'subsample': 0.7384478994797443, 'colsample_bytree': 0.9201298328062288}. Best is trial 1 with value: 2074.9031376856124.
[I 2025-03-11 02:26:46,915] Trial 3 finished with value: 11719.369826680993 and parameters: {'n_estimators': 226, 'max_depth': 

Best parameters for BNB-USD: {'n_estimators': 191, 'max_depth': 3, 'learning_rate': 0.06831844403803894, 'subsample': 0.750525867157928, 'colsample_bytree': 0.7604137412503714}
XGBoost Results for BNB-USD:
MSE: 1305.47
MAPE: 4.21%
Directional Accuracy: 52.14%
Feature Open: 0.2025
Feature High: 0.4523
Feature Low: 0.1163
Feature Close: 0.1029
Feature Volume: 0.0009
Feature Lag1: 0.0353
Feature Lag7: 0.0017
Feature SMA7: 0.0847
Feature RSI14: 0.0011
Feature MACD: 0.0007
Feature Returns: 0.0007
Feature Volatility: 0.0008
Model and scaler saved for BNB-USD

Training XGBoost for BTC-USD...


[I 2025-03-11 02:26:51,920] Trial 0 finished with value: 864983926.1659667 and parameters: {'n_estimators': 284, 'max_depth': 11, 'learning_rate': 0.00990590316297125, 'subsample': 0.9704539771645309, 'colsample_bytree': 0.701664926976546}. Best is trial 0 with value: 864983926.1659667.
[I 2025-03-11 02:26:52,708] Trial 1 finished with value: 846804518.459035 and parameters: {'n_estimators': 170, 'max_depth': 15, 'learning_rate': 0.03581100790096955, 'subsample': 0.9898381131816445, 'colsample_bytree': 0.871705157074335}. Best is trial 1 with value: 846804518.459035.
[I 2025-03-11 02:26:52,953] Trial 2 finished with value: 880210091.1920576 and parameters: {'n_estimators': 239, 'max_depth': 7, 'learning_rate': 0.08514636624095977, 'subsample': 0.7457748034031528, 'colsample_bytree': 0.893367142919416}. Best is trial 1 with value: 846804518.459035.
[I 2025-03-11 02:26:53,179] Trial 3 finished with value: 785393197.8990865 and parameters: {'n_estimators': 170, 'max_depth': 9, 'learning_r

Best parameters for BTC-USD: {'n_estimators': 388, 'max_depth': 3, 'learning_rate': 0.014132988171124437, 'subsample': 0.6073793893110279, 'colsample_bytree': 0.608514008942836}
XGBoost Results for BTC-USD:
MSE: 722784827.29
MAPE: 25.20%
Directional Accuracy: 45.71%
Feature Open: 0.2924
Feature High: 0.1235
Feature Low: 0.1804
Feature Close: 0.1658
Feature Volume: 0.0006
Feature Lag1: 0.2032
Feature Lag7: 0.0068
Feature SMA7: 0.0235
Feature RSI14: 0.0014
Feature MACD: 0.0008
Feature Returns: 0.0009
Feature Volatility: 0.0007
Model and scaler saved for BTC-USD

Training XGBoost for ETH-USD...


[I 2025-03-11 02:27:00,082] Trial 0 finished with value: 19991.24959377321 and parameters: {'n_estimators': 483, 'max_depth': 9, 'learning_rate': 0.007755052480125277, 'subsample': 0.6687351983549851, 'colsample_bytree': 0.968322322165359}. Best is trial 0 with value: 19991.24959377321.
[I 2025-03-11 02:27:00,177] Trial 1 finished with value: 20682.44575024877 and parameters: {'n_estimators': 339, 'max_depth': 3, 'learning_rate': 0.00768903809720403, 'subsample': 0.8976875637485717, 'colsample_bytree': 0.8678732285916584}. Best is trial 0 with value: 19991.24959377321.
[I 2025-03-11 02:27:00,400] Trial 2 finished with value: 18951.521182370103 and parameters: {'n_estimators': 195, 'max_depth': 8, 'learning_rate': 0.08717145173442437, 'subsample': 0.7303554268964572, 'colsample_bytree': 0.981617922322066}. Best is trial 2 with value: 18951.521182370103.
[I 2025-03-11 02:27:00,721] Trial 3 finished with value: 36854.44764341319 and parameters: {'n_estimators': 254, 'max_depth': 11, 'lear

Best parameters for ETH-USD: {'n_estimators': 419, 'max_depth': 4, 'learning_rate': 0.016318972624234675, 'subsample': 0.7159220430878502, 'colsample_bytree': 0.7206079163500086}
XGBoost Results for ETH-USD:
MSE: 16576.59
MAPE: 3.33%
Directional Accuracy: 46.43%
Feature Open: 0.0474
Feature High: 0.1844
Feature Low: 0.3089
Feature Close: 0.4047
Feature Volume: 0.0016
Feature Lag1: 0.0250
Feature Lag7: 0.0022
Feature SMA7: 0.0181
Feature RSI14: 0.0030
Feature MACD: 0.0020
Feature Returns: 0.0015
Feature Volatility: 0.0012
Model and scaler saved for ETH-USD

Training XGBoost for XRP-USD...


[I 2025-03-11 02:27:12,603] Trial 0 finished with value: 2.2860067452442716 and parameters: {'n_estimators': 413, 'max_depth': 13, 'learning_rate': 0.029443751583677284, 'subsample': 0.724527914601244, 'colsample_bytree': 0.9690667117959306}. Best is trial 0 with value: 2.2860067452442716.
[I 2025-03-11 02:27:13,735] Trial 1 finished with value: 2.2673195241285864 and parameters: {'n_estimators': 468, 'max_depth': 13, 'learning_rate': 0.01273017098648033, 'subsample': 0.9212748124031308, 'colsample_bytree': 0.8333641477844607}. Best is trial 1 with value: 2.2673195241285864.
[I 2025-03-11 02:27:13,901] Trial 2 finished with value: 2.3219044487267437 and parameters: {'n_estimators': 488, 'max_depth': 4, 'learning_rate': 0.009875848703126425, 'subsample': 0.7140663171608429, 'colsample_bytree': 0.6059027753594435}. Best is trial 1 with value: 2.2673195241285864.
[I 2025-03-11 02:27:14,332] Trial 3 finished with value: 2.298082284147266 and parameters: {'n_estimators': 248, 'max_depth': 1

Best parameters for XRP-USD: {'n_estimators': 370, 'max_depth': 11, 'learning_rate': 0.02119005296838839, 'subsample': 0.9386790695277416, 'colsample_bytree': 0.8547520028374415}
XGBoost Results for XRP-USD:
MSE: 2.27
MAPE: 52.13%
Directional Accuracy: 50.00%
Feature Open: 0.0138
Feature High: 0.1860
Feature Low: 0.0202
Feature Close: 0.7046
Feature Volume: 0.0041
Feature Lag1: 0.0232
Feature Lag7: 0.0056
Feature SMA7: 0.0137
Feature RSI14: 0.0088
Feature MACD: 0.0067
Feature Returns: 0.0036
Feature Volatility: 0.0098
Model and scaler saved for XRP-USD
