In [1]:
from api.BinanceData import BinanceData
from utils.CleanData import CleanData
from variables.Returns import Returns
from variables.Target import Target
from variables.TradingIndicators import TradingIndicators
import pandas as pd
from models.Models import CryptoModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Import de données
BinanceData = BinanceData()
data = BinanceData.load_data("BTCUSDT", days=1095) # 3 ans de données
print(data.head(2))
print(len(data))

       open_time            open            high             low  \
0  1664658000000  19250.95000000  19261.54000000  19248.87000000   
1  1664658300000  19260.30000000  19262.33000000  19244.58000000   

            close        volume     close_time quote_asset_volume  \
0  19259.68000000  269.59853000  1664658299999   5191135.49695430   
1  19250.17000000  238.35668000  1664658599999   4588603.20906890   

   number_of_trades taker_buy_base   taker_buy_quote ignore  
0              5443   169.61903000  3266112.94285090      0  
1              5358   113.42291000  2183540.30969950      0  
315345


In [3]:
# Nettoyage des données 
cleaner = CleanData()
cleaned_data = cleaner.clean_klines_data(data)
print(cleaned_data.index.name)
print(cleaned_data.tail(3))

open_time
                          high        low      close     volume  \
open_time                                                         
2025-09-30 20:50:00  114714.91  114356.12  114695.65  121.35967   
2025-09-30 20:55:00  114723.57  114575.75  114626.36   69.78428   
2025-09-30 21:00:00  114719.97  114626.35  114714.40    9.12324   

                     quote_asset_volume  number_of_trades  taker_buy_quote  
open_time                                                                   
2025-09-30 20:50:00        1.389826e+07             17930     9.499265e+06  
2025-09-30 20:55:00        7.999537e+06             11303     1.561878e+06  
2025-09-30 21:00:00        1.046338e+06              2038     4.682245e+05  


In [6]:
# Export des données pour travailler sur un ficheir Excel plutôt que de requêter l'API Binance à chaque fois
print(len(cleaned_data))
cleaned_data.to_csv("../data/clean_dataset.csv", index=True)

315345


In [4]:
# Import des données depuis le fichier Excel
data = pd.read_csv("../data/clean_dataset.csv", index_col=0)

# Création des variables rendement et la cible
data["return"] = Returns.return_(data['close'])
data["return_10"] = Returns.return_10(data['close'])
data["target"] = Target.compute(data["return"])

# ajout des indicateurs techniques
data = TradingIndicators.add_sma(data, price_col="close", window=20, new_col="SMA_20")
data = TradingIndicators.add_ema(data, price_col="close", window=12, new_col="EMA_12")
data = TradingIndicators.add_macd(data, price_col="close")
data = TradingIndicators.add_bollinger_bands_width(data, price_col="close")
data = TradingIndicators.add_rsi(data, price_col="close")
data = TradingIndicators.add_atr(data, high_col="high", low_col="low", close_col="close", window=14, new_col="ATR_14")
data = TradingIndicators.add_high_low_range(data, high_col="high", low_col="low")
data = TradingIndicators.add_buy_pressure(data, high_col="high", low_col="low", close_col="close")
data = TradingIndicators.add_volume_pressure(data, taker_buy_col="taker_buy_quote", total_volume_col="quote_asset_volume")
data = TradingIndicators.add_realized_volatility(data, returns_col="return", window=14)

data.index = pd.to_datetime(data.index)

print(data.tail(3))
print(data["target"].value_counts())
print(data.columns)

                          high        low      close     volume  \
open_time                                                         
2025-09-30 20:50:00  114714.91  114356.12  114695.65  121.35967   
2025-09-30 20:55:00  114723.57  114575.75  114626.36   69.78428   
2025-09-30 21:00:00  114719.97  114626.35  114714.40    9.12324   

                     quote_asset_volume  number_of_trades  taker_buy_quote  \
open_time                                                                    
2025-09-30 20:50:00        1.389826e+07             17930     9.499265e+06   
2025-09-30 20:55:00        7.999537e+06             11303     1.561878e+06   
2025-09-30 21:00:00        1.046338e+06              2038     4.682245e+05   

                       return  return_10  target  ...         EMA_12  \
open_time                                         ...                  
2025-09-30 20:50:00  0.002508   0.003294       1  ...  114353.695316   
2025-09-30 20:55:00 -0.000604   0.003843       0  ...  11

In [5]:
data = data.drop(columns=["high", "low", "close", "quote_asset_volume", "taker_buy_quote"])

# feature engineering avec ajout de variables laggées
for col in data.columns:
    if col != "target":
        for lag in range(1,2):
            data[f"{col}_lag_{lag}"] = data[col].shift(lag) 

data = data.dropna()
print(data.columns)

Index(['volume', 'number_of_trades', 'return', 'return_10', 'target', 'SMA_20',
       'EMA_12', 'MACD', 'MACD_Signal', 'BB_width', 'RSI_14', 'ATR_14',
       'High_Low_Range', 'Buy_Pressure', 'Volume_Pressure',
       'Realized_Volatility', 'volume_lag_1', 'number_of_trades_lag_1',
       'return_lag_1', 'return_10_lag_1', 'SMA_20_lag_1', 'EMA_12_lag_1',
       'MACD_lag_1', 'MACD_Signal_lag_1', 'BB_width_lag_1', 'RSI_14_lag_1',
       'ATR_14_lag_1', 'High_Low_Range_lag_1', 'Buy_Pressure_lag_1',
       'Volume_Pressure_lag_1', 'Realized_Volatility_lag_1'],
      dtype='object')


In [None]:
# Application du modèle xgboost avec validation croisée glissante grâce à la classe CryptoModel
import warnings
warnings.filterwarnings("ignore")   

model = CryptoModel(data, lag=1)
results = model.rolling_xgboost(months_train=4, weeks_test=4, n_trials=50)