In [1]:
from api.BinanceData import BinanceData
from utils.CleanData import CleanData
from variables.QuantitativeFunc import QuantitativeFunc
from variables.Target import Target
from variables.TradingIndicators import TradingIndicators
import pandas as pd

In [2]:
# Import de données
BinanceData = BinanceData()
data = BinanceData.load_data("BTCUSDT", days=365)
print(data.head(2))
print(len(data))

       open_time            open            high             low  \
0  1727171400000  63515.97000000  63538.00000000  63500.00000000   
1  1727171700000  63528.20000000  63534.48000000  63502.79000000   

            close       volume     close_time quote_asset_volume  \
0  63528.21000000  26.93131000  1727171699999   1710653.13372570   
1  63524.50000000  28.93329000  1727171999999   1837785.64615810   

   number_of_trades taker_buy_base  taker_buy_quote ignore  
0              5091    13.83107000  878584.25460150      0  
1              5940    11.14070000  707634.39202440      0  
105120


In [3]:
# Nettoyage des données 
cleaner = CleanData()
cleaned_data = cleaner.clean_klines_data(data)
print(cleaned_data.index.name)
print(cleaned_data.tail(3))

open_time
                          high        low      close    volume  \
open_time                                                        
2025-09-24 09:35:00  112837.42  112657.80  112804.42  49.03181   
2025-09-24 09:40:00  112804.42  112740.00  112740.00  14.32038   
2025-09-24 09:45:00  112740.01  112690.88  112690.89  11.95958   

                     quote_asset_volume  number_of_trades  taker_buy_quote  
open_time                                                                   
2025-09-24 09:35:00        5.528530e+06              6203     4.861913e+06  
2025-09-24 09:40:00        1.614813e+06              2609     2.264333e+05  
2025-09-24 09:45:00        1.348144e+06              1484     2.220709e+05  


In [6]:
# Export des données pour travailler sur un ficheir Excel plutôt que de requêter l'API Binance à chaque fois
print(len(cleaned_data))
cleaned_data.to_excel("../data/clean_dataset.xlsx", index=True)

105120


In [2]:
# Import des données depuis le fichier Excel
data = pd.read_excel("../data/clean_dataset.xlsx", index_col=0)

# Création des variables rendement et la cible 
data["return"] = QuantitativeFunc.return_(data['close'])
data["return_10"] = QuantitativeFunc.return_10(data['close'])
data["target"] = Target.compute(data["return"], threshold=0.001)

# ajout des indicateurs techniques
data = TradingIndicators.add_ema(data, price_col="close", window=12, new_col="EMA_12")
data = TradingIndicators.add_macd(data, price_col="close")
data = TradingIndicators.add_bollinger_bands(data, price_col="close")
data = TradingIndicators.add_rsi(data, price_col="close")
data = TradingIndicators.add_atr(data, high_col="high", low_col="low", close_col="close", window=14, new_col="ATR_14")
data = TradingIndicators.add_high_low_range(data, high_col="high", low_col="low")
data = TradingIndicators.add_buy_pressure(data, high_col="high", low_col="low", close_col="close")
data = TradingIndicators.add_volume_pressure(data, taker_buy_col="taker_buy_quote", total_volume_col="quote_asset_volume")
data = TradingIndicators.add_realized_volatility(data, returns_col="return", window=14)

print(data.head(2))
print(data["target"].value_counts())
print(data.columns)

                         high       low     close    volume  \
open_time                                                     
2024-09-24 09:50:00  63538.00  63500.00  63528.21  26.93131   
2024-09-24 09:55:00  63534.48  63502.79  63524.50  28.93329   

                     quote_asset_volume  number_of_trades  taker_buy_quote  \
open_time                                                                    
2024-09-24 09:50:00        1.710653e+06              5091    878584.254601   
2024-09-24 09:55:00        1.837786e+06              5940    707634.392024   

                       return  return_10  target  ...  MACD_Signal  \
open_time                                         ...                
2024-09-24 09:50:00       NaN        NaN       0  ...     0.000000   
2024-09-24 09:55:00 -0.000058        NaN       0  ...    -0.059191   

                     Bollinger_SMA  Bollinger_Upper  Bollinger_Lower  RSI_14  \
open_time                                                                

In [3]:
from models.Models import CryptoModel


crypto_model = CryptoModel(data)


# XGBoost
# xgb_model = crypto_model.xgboost_classification(test_size=0.2)
# Régression logistique
logit_model = crypto_model.logistic_regression(test_size=0.2)

print(logit_model)

=== Logistic Regression Metrics ===
Accuracy : 0.9933
Precision: 0.9387
Recall   : 1.0000
F1-score : 0.9684
Confusion Matrix:
[[18720   141]
 [    0  2160]]
LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
