<a href="https://colab.research.google.com/github/lydianzr/xgboost/blob/main/xgboost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Load your historical data
df = pd.read_csv('cleaned_binance_linear_BTCUSDT_1d.csv')

In [None]:
# Set the existing 'datetime' column as the index
df.set_index('datetime', inplace=True)
df.head()

Unnamed: 0_level_0,start_time,close,high,low,open,volume,Year,Month,Day,DayOfWeek,...,RSI_14,MACD,MACD_Signal,Bollinger_Middle,Bollinger_Std,Bollinger_Upper,Bollinger_Lower,Daily_Return,Log_Return,Volatility_20d
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-10-19,1571443200000,7947.13,8108.54,7852.12,7945.57,78024.395,2019,10,19,5,...,51.893107,0.0,0.0,33870.7255,1665.594913,37941.088155,29129.206302,0.000456,0.000455,0.028914
2019-10-20,1571529600000,8218.23,8300.5,7871.17,7947.78,78663.915,2019,10,20,6,...,51.893107,21.626211,4.325242,33870.7255,1665.594913,37941.088155,29129.206302,0.034113,0.033544,0.028914
2019-10-21,1571616000000,8201.74,8334.57,8150.0,8218.23,66710.26016,2019,10,21,0,...,51.893107,37.007929,10.86178,33870.7255,1665.594913,37941.088155,29129.206302,-0.002007,-0.002009,0.028914
2019-10-22,1571702400000,8020.89,8290.0,8000.0,8201.0,69506.88,2019,10,22,1,...,51.893107,34.21061,15.531546,33870.7255,1665.594913,37941.088155,29129.206302,-0.02205,-0.022297,0.028914
2019-10-23,1571788800000,7465.0,8047.78,7172.76,8020.68,108484.139,2019,10,23,2,...,51.893107,-12.715389,9.882159,33870.7255,1665.594913,37941.088155,29129.206302,-0.069305,-0.071824,0.028914


In [None]:
# Define the date ranges for training, testing, and forward testing
train_start_date = '2019-10-19'
train_end_date = '2023-12-31'
test_start_date = '2024-01-01'
test_end_date = '2024-04-08'  # End the test period just before the forward start
forward_start_date = '2024-04-09'
forward_end_date = '2025-04-09'  # End of your data

In [None]:
# 1. Create three target variables
def create_multi_target(df, periods=1,  price_change_threshold=0.01):
    # Map: 0 = Sell (-1), 1 = Hold (0), 2 = Buy (1)
    df['xgb_param1_target'] = 1  # default hold
    # Price change with threshold
    future_price_change = (df['close'].shift(-periods) - df['close']) / df['close']  # Percentage change
    df.loc[future_price_change > price_change_threshold, 'xgb_param1_target'] = 2  # Buy if significant positive change
    df.loc[future_price_change < -price_change_threshold, 'xgb_param1_target'] = 0 # Sell if significant negative change

    df['xgb_param2_target'] = 1
    df.loc[df['RSI_14'] > 70, 'xgb_param2_target'] = 2 # overbought = sell
    df.loc[df['RSI_14'] < 30, 'xgb_param2_target'] = 0  # oversold = buy

    df['xgb_param3_target'] = 1
    df.loc[df['MACD'] > df['MACD_Signal'], 'xgb_param3_target'] = 2  # bullish crossover = buy
    df.loc[df['MACD'] < df['MACD_Signal'], 'xgb_param3_target'] = 0  # bearish crossover = sell

    df.dropna(inplace=True)
    return df[['xgb_param1_target', 'xgb_param2_target', 'xgb_param3_target']]

In [None]:
multi_target_df = create_multi_target(df.copy())
df_with_targets = pd.concat([df, multi_target_df], axis=1).dropna()

In [None]:
# Split data
train_df = df_with_targets.loc[train_start_date:train_end_date].copy()
test_df = df_with_targets.loc[test_start_date:test_end_date].copy()
forward_df = df_with_targets.loc[forward_start_date:forward_end_date].copy()

In [None]:
features = [col for col in train_df.columns if col not in ['xgb_param1_target', 'xgb_param2_target', 'xgb_param3_target']]
X_train = train_df[features]
X_test = test_df[features]
X_forward = forward_df[features]

In [None]:
# 2. Train three separate XGBoost models with 3-class classification
model1 = XGBClassifier(n_estimators=100, random_state=42, objective='multi:softmax', num_class=3)
model1.fit(X_train, train_df['xgb_param1_target'])

model2 = XGBClassifier(n_estimators=100, random_state=42, objective='multi:softmax', num_class=3)
model2.fit(X_train, train_df['xgb_param2_target'])

model3 = XGBClassifier(n_estimators=100, random_state=42, objective='multi:softmax', num_class=3)
model3.fit(X_train, train_df['xgb_param3_target'])

In [None]:
# 3. Make predictions on the forward testing set
y_pred_forward_1 = model1.predict(X_forward)
y_pred_forward_2 = model2.predict(X_forward)
y_pred_forward_3 = model3.predict(X_forward)

In [None]:
forward_signals_df = pd.DataFrame({
    'date': pd.to_datetime(forward_df.index).strftime('%Y-%m-%d'),
    'xgb_param1_signal': y_pred_forward_1,
    'xgb_param2_signal': y_pred_forward_2,
    'xgb_param3_signal': y_pred_forward_3
})

In [None]:
# Save the forward testing signals to a CSV file
forward_signals_df.to_csv('xgb_forward_signals.csv', index=False)

In [None]:
# 3. Make predictions on the entire dataset
X_all = df_with_targets[features]  # <--- Ensure this line is executed
y_pred_all_1 = model1.predict(X_all)
y_pred_all_2 = model2.predict(X_all)
y_pred_all_3 = model3.predict(X_all)

In [None]:
# 4. Create the final DataFrame for the entire dataset signals
all_signals_df = pd.DataFrame({
    'date': pd.to_datetime(df_with_targets.index).strftime('%Y-%m-%d'),
    'xgb_param1_signal': y_pred_all_1,
    'xgb_param2_signal': y_pred_all_2,
    'xgb_param3_signal': y_pred_all_3
})

In [None]:
# Save the signals for the entire dataset to xgb_forward_signals.csv
all_signals_df.to_csv('xgb_all_signals.csv', index=False)

print("\nSignals for the entire dataset saved to: xgb_forward_signals.csv")
print(all_signals_df.head())


Signals for the entire dataset saved to: xgb_forward_signals.csv
         date  xgb_param1_signal  xgb_param2_signal  xgb_param3_signal
0  2019-10-19                  2                  1                  1
1  2019-10-20                  1                  1                  2
2  2019-10-21                  0                  1                  2
3  2019-10-22                  0                  1                  2
4  2019-10-23                  1                  1                  0
