# Projet 9 — Signaux de trading BTC–USDC : RandomForest vs TabNet

Ce notebook suit le **plan prévisionnel** :  
- Données Binance (BTC–USDC)  
- Cible réaliste basée sur un ROI futur et frais  
- Baseline **RandomForestClassifier**  
- Modèle récent **TabNet** (IBM TSFM) utilisé comme **forecaster**, puis conversion en signal de trading  
- Essai Modèle récent **Tiny Time Mixer (TTM)** (IBM TSFM) utilisé comme **forecaster**, puis conversion en signal de trading mais trop long quand beaucoup de donnée. Code commenté
- Évaluation **ML + trading** (PnL, drawdown)

> Remarque : pour garder une comparaison simple et robuste, TTM est utilisé ici en *forecasting* du `Close` puis transformé en signal `Buy/No-trade` via un seuil de ROI.


## 0) Paramètres

In [179]:
# Paramètres globaux (à ajuster si besoin)
PAIR = "BTC/USDC"
TIMEFRAME = "1h"          # "15m" ou "1h"
START_DATE = "2015-01-01"
END_DATE = "2025-12-15"
CAPITAL_INIT = 1000
# Fenêtre et horizon
LOOKBACK = 512            # contexte pour TTM
HORIZON_STEPS = 24        # 24h si TIMEFRAME = 1h ; 16 si 4h en 15m ; etc.

# Trading/label
FEE_ROUNDTRIP = 0.002     # ~0.20% frais achat+vente (spot, simplifié)
THRESH = FEE_ROUNDTRIP    # seuil ROI minimal pour décider "Buy"

# Split temporel (chronologique)
TRAIN_RATIO = 0.80
VALID_RATIO = 0.10        # test = reste

RANDOM_SEED = 42


## 1) Installations & imports & Functions

### 1.1 Imports

#### Local imports

In [180]:
# Ajout du répertoire racine au sys.path pour s'assurer que le module utils est trouvé
import sys
import os
import importlib

# Obtenir le répertoire de travail actuel (où se trouve le notebook)
project_root = os.getcwd()
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Forcer le rechargement du module utils pour éviter les problèmes de cache du kernel Jupyter
if 'utils' in sys.modules:
    importlib.reload(sys.modules['utils'])

# Import des classes depuis utils
from backtest import Backtest
from utils import plot_backtest, prepare_data_advanced_features ,prepare_data_min_features

In [181]:

# Si vous êtes sur Kaggle / Colab : décommentez selon l'environnement.

# !pip -q install ccxt ta scikit-learn matplotlib pandas numpy torch transformers accelerate

# --- TSFM (IBM) : on reproduit l'approche du notebook Kaggle TTM ---
# Le notebook Kaggle clone IBM/tsfm et importe tsfm_public.
# Ici on fait pareil pour être aligné.
import os, sys, subprocess, textwrap

TSFM_DIR = "tsfm"
# if not os.path.isdir(TSFM_DIR):
#     subprocess.check_call(["bash","-lc", f"git clone --depth 1 --branch v0.2.9 https://github.com/IBM/tsfm.git {TSFM_DIR}"])

# # Use sys.executable to ensure we install in the same Python environment as the notebook kernel
# subprocess.check_call([sys.executable, "-m", "pip", "-q", "install", "-e", TSFM_DIR])

import math
import numpy as np
import pandas as pd
import joblib

import matplotlib.pyplot as plt

from sklearn.metrics import (
    classification_report, confusion_matrix, roc_auc_score, precision_recall_curve, auc
)
from sklearn.ensemble import RandomForestClassifier

import ta

import torch
from transformers import set_seed

set_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)


### 1.1 Functions utils

#### Backtest Class

In [182]:
# class Trader:
#     def __init__(self, row: pd.Series, idx: int, idx_entry: int, signal: np.ndarray, capital: float, portfolio: float, position: float, qty: float, entry_price: float, exit_price: float, fee_roundtrip=0.002, pct_capital=1, debug=False, trade_list=[]):
#         self.row = row
#         self.idx = idx
#         self.signal = signal
#         self.fee_roundtrip = fee_roundtrip
#         self.pct_capital = pct_capital
#         self.capital = capital    
#         self.portfolio = portfolio
#         self.position = position
#         self.qty = qty
#         self.entry_price = entry_price
#         self.exit_price = exit_price
#         self.debug = debug
#         self.idx_entry = idx_entry
#         self.trade_list = trade_list
#         self.timestamp_entry = None
#         self.max_drawdown_pct = 0


#     def _buy(self):
#         self.qty = self.pct_capital * self.capital / self.row["Close"]
#         position_value = self.qty * self.row["Close"]
#         self.position = position_value  # Montant investi dans la position
#         self.entry_price = self.row["Close"]
#         buy_fees = self.fee_roundtrip * position_value / 2
#         self.capital -= (position_value + buy_fees)
#         self.portfolio = position_value  # Portfolio = valeur de la position
#         self.idx_entry = self.idx
#         self.timestamp_entry = self.row["Timestamp"]
#         if self.debug:
#             print(f"Idx: {self.idx} / Buy: {self.qty:.8f} @ {self.entry_price:.2f}")
#         return True

#     def _sell(self):
#         sell_value = self.qty * self.row["Close"]
#         sell_fees = self.fee_roundtrip * sell_value / 2
#         PnL = self.qty * (self.row["Close"] - self.entry_price)
#         PnL_net = PnL - sell_fees
#         capital_before_sell = self.portfolio+self.capital
#         self.capital += sell_value - sell_fees
#         self.position = 0  # Plus de position ouverte
#         self.exit_price = self.row["Close"]
#         self.portfolio = 0  # Portfolio vide après vente
#         self.max_drawdown_pct = (PnL/capital_before_sell)*100
#         self.trade_list.append({
#             "idx": self.idx,
#             "idx_entry": self.idx_entry,
#             "Timestamp": self.row["Timestamp"],
#             "Timestamp_entry": self.timestamp_entry,
#             "qty": self.qty,
#             "entry_price": self.entry_price,
#             "exit_price": self.exit_price,
#             "PnL": PnL,
#             "PnL_net": PnL_net,
#             "Capital": self.capital,
#             "MaxDrawDown": self.max_drawdown_pct,
#         })

#         if self.debug:
#             print(f"Idx: {self.idx} / Sell: {self.qty:.8f} @ {self.exit_price:.2f}")
#             print(f"PnL: {PnL:.2f}")
#             print(f"PnL net (après frais): {PnL_net:.2f}")
#             print(f"Portfolio: {self.portfolio:.2f}")
#             print(f"Capital: {self.capital:.2f}")
#         return True

#     def run(self):
#         # Conversion du signal en int (gère les cas numpy array et scalar)
#         sig = int(self.signal) if isinstance(self.signal, (np.ndarray, np.generic)) else int(self.signal)
        
#         # Mise à jour du portfolio si position ouverte (valeur actuelle de la position)
#         if self.position > 0:
#             self.portfolio = self.qty * self.row["Close"]
        
#         if self.debug:
#             print(f"Idx: {self.idx} / Signal: {sig} / Position: {self.position:.2f} / Portfolio: {self.portfolio:.2f}")
        
#         # Achat : signal=1 et pas de position ouverte
#         if sig == 1 and self.position == 0:
#             self._buy()
#         # Vente : signal=0 et position ouverte (on vend dès que le signal passe à 0)
#         elif sig == 0 and self.position > 0 and self.idx >= self.idx_entry + HORIZON_STEPS:
#             self._sell()
        
#         return self.portfolio, self.capital, self.position, self.qty, self.entry_price, self.exit_price, self.trade_list     


# class Backtest:
#     def __init__(self, df_bt: pd.DataFrame, signal: np.ndarray, fee_roundtrip=0.002, pct_capital=1, capital_init=1000, debug=False):
#         self.df_bt = df_bt
#         self.signal = signal
#         self.fee_roundtrip = fee_roundtrip
#         self.pct_capital = pct_capital
#         self.capital_init = capital_init  # Sauvegarder le capital initial
#         self.capital = capital_init
#         self.position = 0
#         self.qty = 0
#         self.entry_price = 0
#         self.exit_price = 0
#         self.portfolio = 0
#         self.debug = debug
#         self.idx_entry = 0
#         self.trade_list = []
#         self.max_drawdown_pct = 0
#         self.run()
#         self.print_stats()

#     def run(self):
#         trader = Trader([], 0, 0, 0, self.capital, self.portfolio, self.position, self.qty, self.entry_price, self.exit_price, self.fee_roundtrip, self.pct_capital, debug=self.debug, trade_list=self.trade_list)
#         last_idx = None
#         for i, row in self.df_bt.iterrows():
#             trader.row = row
#             trader.idx = i
#             trader.signal = self.signal[i]
#             trader.run()
#             self.portfolio = trader.portfolio
#             self.capital = trader.capital   
#             self.position = trader.position
#             self.qty = trader.qty
#             self.entry_price = trader.entry_price
#             self.exit_price = trader.exit_price
#             self.idx_entry = trader.idx_entry
#             self.timestamp_entry = trader.timestamp_entry
#             last_idx = i

#         # Clôture forcée si position ouverte en fin de backtest
#         if self.position > 0 and last_idx is not None:
#             last_row = self.df_bt.iloc[last_idx]
#             sell_value = self.qty * last_row["Close"]
#             sell_fees = self.fee_roundtrip * sell_value / 2
#             PnL = self.qty * (last_row["Close"] - self.entry_price)
#             PnL_net = PnL - sell_fees
#             self.capital += sell_value - sell_fees
#             self.portfolio = 0
#             self.position = 0
#             self.trade_list.append({
#                 "idx": last_idx,
#                 "idx_entry": self.idx_entry,
#                 "Timestamp": last_row["Timestamp"],
#                 "Timestamp_entry": self.timestamp_entry,
#                 "qty": self.qty,
#                 "entry_price": self.entry_price,
#                 "exit_price": last_row["Close"],
#                 "PnL": PnL,
#                 "PnL_net": PnL_net,
#                 "Capital": self.capital,
#                 "MaxDrawDown": self.max_drawdown_pct,
#             })
#             self.qty = 0
#             self.entry_price = 0
#             self.exit_price = 0
        
#         days = (self.df_bt.iloc[-1]["Timestamp"] - self.df_bt.iloc[0]["Timestamp"]).days
#         if days <= 0:
#             days = 1  # Avoid division by zero
#         self.days = days
#         self.PnL = self.capital - self.capital_init
#         self.ROI_pct = self.PnL / self.capital_init *100
#         self.ROI_day_pct = self.PnL / self.capital_init / days * 100
#         # Calculate annualized ROI: convert ROI_pct from percentage to decimal first
#         roi_decimal = self.ROI_pct / 100
#         if roi_decimal <= -1:
#             # If we lost more than 100%, return -100%
#             self.ROI_annualized_pct = -100.0
#         else:
#             self.ROI_annualized_pct = ((1 + roi_decimal) ** (365.0 / days) - 1) * 100
#         self.df_trades = pd.DataFrame(self.trade_list)
#         self.win_rates = self.df_trades["PnL"].apply(lambda x: x > 0).mean()*100
#         self.nb_trades = len(self.df_trades)
#         self.nb_trades_by_day = self.nb_trades / days
#         self.max_drawdown_pct = self.df_trades["MaxDrawDown"].max()
#         return self.portfolio, self.capital, self.position, self.qty, self.entry_price, self.exit_price, self.trade_list
    
#     def print_stats(self):
#         print(f"Days: {self.days}")
#         print(f"Portfolio: {self.portfolio}")
#         print(f"Capital: {self.capital}")
#         print(f"PnL: {self.capital - CAPITAL_INIT}")
#         print(f"Position: {self.position}")
#         print(f"ROI: {self.ROI_pct:.2f}%")
#         print(f"ROI annualized: {self.ROI_annualized_pct:.2f}%")
#         print(f"ROI day: {self.ROI_day_pct:.2f}%")
#         print(f"Win rate: {self.win_rates:.2f}%")
#         print(f"Nb trades: {self.nb_trades}")
#         print(f"Nb trades par jour: {self.nb_trades_by_day:.2f}")
#         print(f"Max DrawDown: {self.max_drawdown_pct:.2f}%")


#### Plot backtest

In [183]:
# import plotly.graph_objs as go
# from plotly.subplots import make_subplots


# def plot_backtest(backtester):
#     # On suppose que trades_df == backtester.df_trades déjà généré avec l'algo ci-dessus
#     trades_df = backtester.df_trades

#     # Pour le graphique, récupérer le temps et close price
#     df_curves = backtester.df_bt.reset_index(drop=True)
#     df_curves["Timestamp_entry"] = df_curves["Timestamp"]
#     df_curves = pd.merge(df_curves, trades_df[["Timestamp", "exit_price","Capital"]], on="Timestamp", how="left")
#     df_curves = pd.merge(df_curves, trades_df[["Timestamp_entry", "entry_price"]], on="Timestamp_entry", how="left")
#     df_curves["Capital"] = df_curves["Capital"].ffill().fillna(backtester.capital_init)

#     timestamps = df_curves["Timestamp"]
#     close_prices = df_curves["Close"]
#     capital_curve = df_curves["Capital"]
#     buy_time = df_curves["Timestamp_entry"]
#     buy_price = df_curves["entry_price"]
#     sell_time = df_curves["Timestamp"]
#     sell_price = df_curves["exit_price"]

#     # Créer un subplot avec 2 graphiques (prix en haut, capital en bas)
#     fig = make_subplots(
#         rows=2, cols=1,
#         shared_xaxes=True,
#         vertical_spacing=0.1,
#         subplot_titles=('Cours Close avec signaux Buy/Sell', 'Évolution du Capital'),
#         row_heights=[0.6, 0.4]
#     )

#     # Graphique 1 : Prix avec signaux
#     fig.add_trace(
#         go.Scatter(
#             x=timestamps,
#             y=close_prices,
#             mode='lines',
#             name='Close',
#             line=dict(color='blue')
#         ),
#         row=1, col=1
#     )

#     fig.add_trace(
#         go.Scatter(
#             x=buy_time,
#             y=buy_price,
#             mode='markers',
#             marker=dict(color='green', symbol='triangle-up', size=10),
#             name='Buy'
#         ),
#         row=1, col=1
#     )

#     fig.add_trace(
#         go.Scatter(
#             x=sell_time,
#             y=sell_price,
#             mode='markers',
#             marker=dict(color='red', symbol='triangle-down', size=10),
#             name='Sell'
#         ),
#         row=1, col=1
#     )

#     # Graphique 2 : Capital
#     fig.add_trace(
#         go.Scatter(
#             x=timestamps,
#             y=capital_curve,
#             mode='lines',
#             name='Capital',
#             line=dict(color='purple', width=2)
#         ),
#         row=2, col=1
#     )

#     # Ligne de référence pour le capital initial
#     fig.add_hline(
#         y=backtester.capital_init,
#         line_dash="dash",
#         line_color="gray",
#         annotation_text=f"Capital initial: {backtester.capital_init:.2f}",
#         row=2, col=1
#     )

#     fig.update_layout(
#         title='Cours Close avec signaux Buy/Sell et Évolution du Capital',
#         height=800,
#         width=1200,
#         showlegend=True,
#         legend=dict(x=0, y=1)
#     )

#     fig.update_xaxes(title_text="Timestamp", row=2, col=1)
#     fig.update_yaxes(title_text="Prix", row=1, col=1)
#     fig.update_yaxes(title_text="Capital", row=2, col=1)

#     fig.show()

## 2) Chargement des données Binance (ccxt) + cache local

In [184]:

import ccxt
from datetime import datetime, timezone
from pandas import to_datetime

def to_ms(dt: datetime) -> int:
    return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1000)

def fetch_ohlcv_binance(pair: str, timeframe: str, start_date: int, end_date: int, limit=1000):
    ex = ccxt.binance({"enableRateLimit": True})
    all_rows = []
    since = to_ms(to_datetime(start_date))
    end_ms = to_ms(to_datetime(end_date))

    while since < end_ms:
        batch = ex.fetch_ohlcv(pair, timeframe=timeframe, since=since, limit=limit)
        if not batch:
            break
        all_rows.extend(batch)
        # avance d'un pas après le dernier timestamp
        since = batch[-1][0] + 1

        # sécurité anti-boucle
        if len(batch) < 10:
            break

    df = pd.DataFrame(all_rows, columns=["Timestamp", "Open", "High", "Low", "Close", "Volume"])
    df["Timestamp"] = pd.to_datetime(df["Timestamp"], unit="ms", utc=True)
    df = df.drop_duplicates(subset=["Timestamp"]).sort_values("Timestamp").reset_index(drop=True)
    return df

CACHE = f"btc_usdc_{TIMEFRAME}_{START_DATE}_{END_DATE}.csv"
if os.path.isfile(CACHE):
    df = pd.read_csv(CACHE, parse_dates=["Timestamp"])
    if df["Timestamp"].dt.tz is None:
        df["Timestamp"] = df["Timestamp"].dt.tz_localize("UTC")
else:
    df = fetch_ohlcv_binance(PAIR, TIMEFRAME, START_DATE, END_DATE)
    df.to_csv(CACHE, index=False)

df.head(), df.tail(), df.shape


(                  Timestamp     Open     High      Low    Close     Volume
 0 2018-12-15 03:00:00+00:00  3200.00  3312.32  3000.00  3225.97   2.374006
 1 2018-12-15 04:00:00+00:00  3225.97  3228.10  3205.58  3228.10   2.410518
 2 2018-12-15 05:00:00+00:00  3228.10  3228.10  3204.06  3222.87   3.514068
 3 2018-12-15 06:00:00+00:00  3225.68  3225.80  3199.87  3199.87   2.220411
 4 2018-12-15 07:00:00+00:00  3199.88  3220.15  3191.44  3205.42  46.164846,
                       Timestamp      Open      High       Low     Close  \
 57427 2025-12-17 13:00:00+00:00  87009.40  87837.43  86800.00  87605.29   
 57428 2025-12-17 14:00:00+00:00  87605.29  89673.26  87139.53  89653.85   
 57429 2025-12-17 15:00:00+00:00  89653.85  90352.81  87124.00  87222.12   
 57430 2025-12-17 16:00:00+00:00  87213.00  87759.81  86144.49  86964.42   
 57431 2025-12-17 17:00:00+00:00  86964.42  87085.00  86237.26  86550.76   
 
            Volume  
 57427   308.98080  
 57428   768.17850  
 57429  1167.59820  
 

## 3) Préparation : nettoyage, features RF, labels

In [185]:
# def clean_data(df):
#     df = df.copy()
#     df = df.dropna().reset_index(drop=True)
#     return df

# def calculate_features_pct_change(df):
#     df = df.copy()
#     df["Close_pct_change"] = df["Close"].pct_change()
#     df["High_pct_change"] = df["High"].pct_change()
#     df["Low_pct_change"] = df["Low"].pct_change()          
#     df["Volume_pct_change"] = df["Volume"].pct_change()
#     features_cols = ["Close_pct_change", "High_pct_change", "Low_pct_change", "Volume_pct_change"]
#     return df, features_cols

# def calculate_features_technical(df):
#     df = df.copy()
#     close = df["Close"]
#     high = df["High"]
#     low  = df["Low"]
#     vol  = df["Volume"]
#     open = df["Open"]

#     df["logret_1"] = np.log(close).diff()
#     df["logret_5"] = np.log(close).diff(5)
#     df["logret_20"] = np.log(close).diff(20)

#     df["vol_20"] = df["logret_1"].rolling(20).std()
#     df["vol_50"] = df["logret_1"].rolling(50).std()

#     df["ma20"] = close.rolling(20).mean()
#     df["ma50"] = close.rolling(50).mean()
#     df["ema20"] = close.ewm(span=20, adjust=False).mean()
#     df["ema50"] = close.ewm(span=50, adjust=False).mean()

#     df["ma_diff"] = df["ma20"] - df["ma50"]
#     df["ema_diff"] = df["ema20"] - df["ema50"]

#     df["rsi14"] = ta.momentum.rsi(close, window=14)
#     macd = ta.trend.MACD(close)
#     df["macd"] = macd.macd()
#     df["macd_signal"] = macd.macd_signal()

#     df["atr14"] = ta.volatility.average_true_range(high, low, close, window=14) / close
#     df["adx14"] = ta.trend.adx(high, low, close, window=14)

#     df["hl_range"] = (high - low) / close
#     df["np_range"] = (high - low) / (close - open+1e-6)
#     features_cols = []

#     # On retire les lignes avec NaNs (features + futur)
#     features_cols = ["logret_1", "logret_5", "logret_20", "vol_20", 
#     "vol_50", "ma20", "ma50", "ema20", "ema50", "ma_diff", "ema_diff", 
#     "rsi14", "macd", "macd_signal", "atr14", "adx14", "hl_range", "np_range"]
    
#     return df, features_cols

# def calculate_label(df):
#     df = df.copy()
#     # --- Label : ROI futur à HORIZON_STEPS ---
#     df["future_close"] = df["Close"].shift(-HORIZON_STEPS)
#     df["roi_H"] = (df["future_close"] - df["Close"]) / df["Close"]
#     df["y"] = (df["roi_H"] > THRESH).astype(int)
#     return df

# def prepare_data_min_features(df):
#     # Nettoyage basique
#     df = clean_data(df)
#     df, features_cols = calculate_features_pct_change(df)
#     df = calculate_label(df)
#     df_model = df.dropna(subset=features_cols + ["y"]).reset_index(drop=True)
#     df_model["Volume_pct_change"] = df_model["Volume_pct_change"].replace([np.inf, -np.inf], 0)

#     return df_model, features_cols


# def prepare_data_advanced_features(df):
#     # Nettoyage basique
#     df = clean_data(df)
#     df,features_cols = calculate_features_technical(df)
#     df = calculate_label(df)
#     df_model = df.dropna(subset=features_cols + ["y"]).reset_index(drop=True)
#     return df_model, features_cols
    


## 4) Baseline — RandomForestClassifier (Feature mini)

In [186]:
df_model,feature_cols = prepare_data_min_features(df,HORIZON_STEPS,THRESH)
# Split temporel
n = len(df_model)
train_end = int(n * TRAIN_RATIO)
valid_end = int(n * (TRAIN_RATIO + VALID_RATIO))

train_df = df_model.iloc[:train_end].copy()
valid_df = df_model.iloc[train_end:valid_end].copy()
test_df  = df_model.iloc[valid_end:].copy()

print(f"train_df.shape: {train_df.shape}")
print(f"valid_df.shape: {valid_df.shape}")
print(f"test_df.shape: {test_df.shape}")
print(f"df_model['y'].value_counts(normalize=True): {df_model['y'].value_counts(normalize=True)}")
print(f"Train/Valid/Test ratio: {train_end}/{valid_end}/{n-valid_end}")


X_train = train_df[feature_cols].values
y_train = train_df["y"].values

X_valid = valid_df[feature_cols].values
y_valid = valid_df["y"].values

X_test  = test_df[feature_cols].values
y_test  = test_df["y"].values

rf = RandomForestClassifier(
    n_estimators=800,
    max_depth=20,
    min_samples_leaf=10,
    class_weight="balanced",
    random_state=RANDOM_SEED,
    n_jobs=-1
)
rf.fit(X_train, y_train)

# Probabilités pour AUC/PR-AUC
p_test = rf.predict_proba(X_test)[:,1]
pred_test_rf_simple = (p_test >= 0.5).astype(int)

print("RandomForest — Classification report (test):")
print(classification_report(y_test, pred_test_rf_simple, digits=4))

cm = confusion_matrix(y_test, pred_test_rf_simple)
print("Confusion matrix:\n", cm)

try:
    roc = roc_auc_score(y_test, p_test)
    print("ROC-AUC:", roc)
except Exception as e:
    print("ROC-AUC non calculable:", e)

prec, rec, _ = precision_recall_curve(y_test, p_test)
pr_auc = auc(rec, prec)
print("PR-AUC:", pr_auc)


train_df.shape: (45944, 13)
valid_df.shape: (5743, 13)
test_df.shape: (5744, 13)
df_model['y'].value_counts(normalize=True): y
0    0.522732
1    0.477268
Name: proportion, dtype: float64
Train/Valid/Test ratio: 45944/51687/5744
RandomForest — Classification report (test):
              precision    recall  f1-score   support

           0     0.5501    0.7030    0.6172      3155
           1     0.4527    0.2993    0.3604      2589

    accuracy                         0.5211      5744
   macro avg     0.5014    0.5012    0.4888      5744
weighted avg     0.5062    0.5211    0.5015      5744

Confusion matrix:
 [[2218  937]
 [1814  775]]
ROC-AUC: 0.5040921024522254
PR-AUC: 0.46070281008843295


In [187]:
# Ajoute gridSearch cv sur tscv sur les hyperparamètres de RF

from sklearn.model_selection import GridSearchCV, TimeSeriesSplit

# Définition des hyperparamètres à explorer
param_grid = {
    'n_estimators': [800],
    'max_depth': [10, 20, 25],
    'min_samples_leaf': [10,50],
    'class_weight': ['balanced'],
    'random_state': [RANDOM_SEED]
}

# TimeSeries cross-validator 
tscv = TimeSeriesSplit(n_splits=5)

rf_model = RandomForestClassifier(n_jobs=-1)

# GridSearch avec CV temporel
grid = GridSearchCV(
    estimator=rf_model,
    param_grid=param_grid,
    cv=tscv,
    scoring='roc-auc',
    n_jobs=-1,
    verbose=2,
)

grid.fit(X_train, y_train)

print("Best RF params via GridSearchCV/TS-CV :")
print(grid.best_params_)
print("Best RF score:", grid.best_score_)

# Vous pouvez choisir le meilleur modèle ainsi:
best_rf = grid.best_estimator_

# Et l'utiliser comme à la place de rf :
# best_rf.fit(X_train, y_train)
# etc.



InvalidParameterError: The 'scoring' parameter of GridSearchCV must be a str among {'fowlkes_mallows_score', 'balanced_accuracy', 'jaccard_micro', 'top_k_accuracy', 'mutual_info_score', 'adjusted_mutual_info_score', 'r2', 'precision_samples', 'jaccard_samples', 'precision_micro', 'f1_weighted', 'precision_weighted', 'neg_root_mean_squared_error', 'rand_score', 'recall_samples', 'homogeneity_score', 'neg_brier_score', 'completeness_score', 'average_precision', 'f1_samples', 'neg_median_absolute_error', 'd2_absolute_error_score', 'recall_weighted', 'normalized_mutual_info_score', 'precision', 'recall', 'recall_macro', 'accuracy', 'jaccard', 'neg_log_loss', 'f1', 'jaccard_macro', 'f1_micro', 'neg_mean_squared_error', 'neg_negative_likelihood_ratio', 'precision_macro', 'neg_mean_absolute_error', 'adjusted_rand_score', 'jaccard_weighted', 'roc_auc_ovr_weighted', 'matthews_corrcoef', 'v_measure_score', 'neg_root_mean_squared_log_error', 'recall_micro', 'positive_likelihood_ratio', 'roc_auc_ovo', 'roc_auc_ovr', 'f1_macro', 'neg_mean_squared_log_error', 'neg_mean_gamma_deviance', 'neg_max_error', 'roc_auc', 'roc_auc_ovo_weighted', 'neg_mean_poisson_deviance', 'explained_variance', 'neg_mean_absolute_percentage_error'}, a callable, an instance of 'list', an instance of 'tuple', an instance of 'dict' or None. Got 'roc-auc' instead.

### Backtest simple (baseline RF)
Stratégie : **spot long-only**. Si prédiction=1 → être long sur l’horizon (ici simplifié pas-à-pas : position à 1 tant que le signal vaut 1). Frais appliqués lors des changements de position.

In [None]:
backtester_rf_simple = Backtest(test_df.reset_index(), pred_test_rf_simple, fee_roundtrip=0, pct_capital=1, capital_init=CAPITAL_INIT, debug=False)


In [None]:
plot_backtest(backtester_rf_simple)

## 4bis) RandomForet feature advanced

In [None]:
df_model,feature_cols = prepare_data_advanced_features(df,HORIZON_STEPS,THRESH)
# Split temporel
n = len(df_model)
train_end = int(n * TRAIN_RATIO)
valid_end = int(n * (TRAIN_RATIO + VALID_RATIO))

train_df = df_model.iloc[:train_end].copy()
valid_df = df_model.iloc[train_end:valid_end].copy()
test_df  = df_model.iloc[valid_end:].copy()

print(f"train_df.shape: {train_df.shape}")
print(f"valid_df.shape: {valid_df.shape}")
print(f"test_df.shape: {test_df.shape}")
print(f"df_model['y'].value_counts(normalize=True): {df_model['y'].value_counts(normalize=True)}")
print(f"Train/Valid/Test ratio: {train_end}/{valid_end}/{n-valid_end}")


X_train = train_df[feature_cols].values
y_train = train_df["y"].values

X_valid = valid_df[feature_cols].values
y_valid = valid_df["y"].values

X_test  = test_df[feature_cols].values
y_test  = test_df["y"].values

rf = RandomForestClassifier(
    n_estimators=800,
    max_depth=20,
    min_samples_leaf=10,
    class_weight="balanced",
    random_state=RANDOM_SEED,
    n_jobs=-1
)
rf.fit(X_train, y_train)

# Probabilités pour AUC/PR-AUC
p_test = rf.predict_proba(X_test)[:,1]
pred_test_rf_advanced = (p_test >= 0.5).astype(int)

print("RandomForest — Classification report (test):")
print(classification_report(y_test, pred_test_rf_advanced, digits=4))

cm = confusion_matrix(y_test, pred_test_rf_advanced)
print("Confusion matrix:\n", cm)

try:
    roc = roc_auc_score(y_test, p_test)
    print("ROC-AUC:", roc)
except Exception as e:
    print("ROC-AUC non calculable:", e)

prec, rec, _ = precision_recall_curve(y_test, p_test)
pr_auc = auc(rec, prec)
print("PR-AUC:", pr_auc)


In [None]:
backtester_rf_advanced = Backtest(test_df.reset_index(), pred_test_rf_advanced, fee_roundtrip=0, pct_capital=1, capital_init=CAPITAL_INIT, debug=False)


In [None]:
plot_backtest(backtester_rf_advanced)

## 5) Modèle récent — TabNetClassifier

In [None]:
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.preprocessing import StandardScaler

df_model,feature_cols = prepare_data_min_features(df,HORIZON_STEPS,THRESH)
# Split temporel
n = len(df_model)
train_end = int(n * TRAIN_RATIO)
valid_end = int(n * (TRAIN_RATIO + VALID_RATIO))

train_df = df_model.iloc[:train_end].copy()
valid_df = df_model.iloc[train_end:valid_end].copy()
test_df  = df_model.iloc[valid_end:].copy()

print(f"train_df.shape: {train_df.shape}")
print(f"valid_df.shape: {valid_df.shape}")
print(f"test_df.shape: {test_df.shape}")
print(f"df_model['y'].value_counts(normalize=True): {df_model['y'].value_counts(normalize=True)}")
print(f"Train/Valid/Test ratio: {train_end}/{valid_end}/{n-valid_end}")


X_train = train_df[feature_cols].values
y_train = train_df["y"].values

X_valid = valid_df[feature_cols].values
y_valid = valid_df["y"].values

X_test  = test_df[feature_cols].values
y_test  = test_df["y"].values

# Scaling (fit sur train uniquement) - CRITIQUE pour TabNet
scaler = StandardScaler()

X_train_s = scaler.fit_transform(X_train)
X_valid_s = scaler.transform(X_valid)
X_test_s  = scaler.transform(X_test)

y_train_tab = y_train
y_valid_tab = y_valid
y_test_tab  = y_test

# Retour aux hyperparamètres qui fonctionnaient mieux (TEST 1 était pire)
tabnet = TabNetClassifier(
    n_d=32, n_a=32, 
    n_steps=5,
    gamma=1.5,
    lambda_sparse=1e-4,
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-3),
    mask_type="sparsemax",
    seed=RANDOM_SEED,
    verbose=10
)

tabnet.fit(
    X_train_s, y_train_tab,
    eval_set=[(X_valid_s, y_valid_tab)],
    eval_name=["valid"],
    eval_metric=["auc"],
    max_epochs=200,
    patience=50,
    batch_size=1024,
    virtual_batch_size=256,
    num_workers=0,
    drop_last=False
)


# Évaluation classification (test)
p_test_tab = tabnet.predict_proba(X_test_s)[:, 1]
pred_test_tab = (p_test_tab >= 0.5).astype(int)

print("\n" + "="*60)
print("TabNet - Classification report (test)")
print("="*60)
print(classification_report(y_test_tab, pred_test_tab, digits=4))

cm_tab = confusion_matrix(y_test_tab, pred_test_tab)
print("Confusion matrix:\n", cm_tab)

try:
    roc_tab = roc_auc_score(y_test_tab, p_test_tab)
    print("ROC-AUC:", roc_tab)
except Exception as e:
    print("ROC-AUC non calculable:", e)

prec_tab, rec_tab, _ = precision_recall_curve(y_test_tab, p_test_tab)
pr_auc_tab = auc(rec_tab, prec_tab)
print("PR-AUC:", pr_auc_tab)

In [None]:
# Sauvegarde du modèle TabNet au format pkl
joblib.dump(tabnet, "models/tabnet_model.pkl")

In [None]:
tabnet = joblib.load("models/tabnet_model.pkl")    

# Évaluation classification (test)
p_test_tab = tabnet.predict_proba(X_test_s)[:, 1]
pred_test_tab = (p_test_tab >= 0.5).astype(int)

print("\n" + "="*60)
print("TabNet - Classification report (test)")
print("="*60)
print(classification_report(y_test_tab, pred_test_tab, digits=4))

cm_tab = confusion_matrix(y_test_tab, pred_test_tab)
print("Confusion matrix:\n", cm_tab)

try:
    roc_tab = roc_auc_score(y_test_tab, p_test_tab)
    print("ROC-AUC:", roc_tab)
except Exception as e:
    print("ROC-AUC non calculable:", e)

prec_tab, rec_tab, _ = precision_recall_curve(y_test_tab, p_test_tab)
pr_auc_tab = auc(rec_tab, prec_tab)
print("PR-AUC:", pr_auc_tab)
backtester_tabnet = Backtest(test_df.reset_index(), pred_test_tab, fee_roundtrip=0, pct_capital=1, capital_init=CAPITAL_INIT, debug=False)
plot_backtest(backtester_tabnet)

In [None]:
#Cellule — Tableau comparaison RF vs TabNet (version simple, basée sur capital/PnL)
capital_init = CAPITAL_INIT

# RF (si tu utilises déjà `backtester` pour RF)
pnl_rf_simple = backtester_rf_simple.capital - capital_init
pnl_rf_advanced = backtester_rf_advanced.capital - capital_init
pnl_tabnet = backtester_tabnet.capital - capital_init

results_bt = pd.DataFrame([
    {"Model": "RandomForest Simple", "CapitalFinal": backtester_rf_simple.capital, "PnL": pnl_rf_simple},
    {"Model": "RandomForest Advanced", "CapitalFinal": backtester_rf_advanced.capital, "PnL": pnl_rf_advanced},
    {"Model": "TabNet", "CapitalFinal": backtester_tabnet.capital, "PnL": pnl_tabnet},
])

results_bt

## 6) Modèle récent — Tiny Time Mixer (TTM) : forecasting → signal

On reproduit l’approche du notebook Kaggle **Tiny Time Mixer** (TSFM) :  
- on prépare le dataset pour forecasting (cible = `Close`)  
- on fine-tune léger TTM (optionnel, mais recommandé)  
- on produit une prévision à `HORIZON_STEPS` et on convertit en signal :  
`Buy` si ROI prédit > seuil (frais).


In [None]:
# # Ensure tsfm_public is installed in the current Python environment
# import sys
# import subprocess
# import os

# try:
#     import tsfm_public
#     print(f"✓ tsfm_public is installed (version: {tsfm_public.__version__})")
# except ImportError:
#     print("Installing tsfm_public in current Python environment...")
#     TSFM_DIR = "tsfm"
#     if not os.path.isdir(TSFM_DIR):
#         subprocess.check_call(["bash","-lc", f"git clone --depth 1 --branch v0.2.9 https://github.com/IBM/tsfm.git {TSFM_DIR}"])
#     subprocess.check_call([sys.executable, "-m", "pip", "-q", "install", "-e", TSFM_DIR])
#     print("✓ Installation complete. Please restart the kernel and re-run this cell.")



In [None]:
# # Import from submodules directly (following official notebook pattern)
# # Import from specific modules to avoid circular import issues
# from tsfm_public.toolkit.time_series_preprocessor import TimeSeriesPreprocessor, get_datasets
# from tsfm_public.models.tinytimemixer import TinyTimeMixerForPrediction
# from tsfm_public.toolkit.time_series_forecasting_pipeline import TimeSeriesForecastingPipeline

# # Import transformers utilities
# from transformers import EarlyStoppingCallback, Trainer, TrainingArguments
# from torch.optim import AdamW
# from torch.optim.lr_scheduler import OneCycleLR

# print("✓ Imports successful")


### 6.1) Préparation des données pour TTM

In [None]:

# # Pour rester simple : TTM voit Open/High/Low comme observables, Close comme target
# timestamp_column = "Timestamp"
# target_columns = ["nextClose_pct_change"]

# df_model,feature_cols = prepare_data_min_features(df,HORIZON_STEPS,THRESH)
# df_model["nextClose_pct_change"] = (df_model["Close"].shift(-1) -df_model["Close"]) / df_model["Close"]
# observable_columns = feature_cols

# # On prend df_model (brut) mais on se limite à la zone où le futur existe (sinon label NaN)
# df_ttm = df_model.dropna(subset=observable_columns+target_columns).copy()

# # Préprocesseur TSFM (semblable au notebook Kaggle)
# # Note: context_length et prediction_length sont passés au preprocessor
# # Le modèle pré-entraîné utilise prediction_length=96, on doit l'utiliser ici aussi
# # On utilisera ensuite seulement les HORIZON_STEPS premiers pas pour le signal de trading
# tsp = TimeSeriesPreprocessor(
#     timestamp_column=timestamp_column,
#     id_columns=[],
#     target_columns=target_columns,
#     observable_columns=observable_columns,
#     freq=TIMEFRAME,
#     context_length=LOOKBACK,
#     prediction_length=96  # Doit correspondre au modèle pré-entraîné
# )

# # Split indices sur df_ttm (chronologique)
# data_length = len(df_ttm)
# train_start_index = 0
# train_end_index = round(data_length * TRAIN_RATIO)

# eval_start_index = train_end_index - LOOKBACK
# eval_end_index   = round(data_length * (TRAIN_RATIO + VALID_RATIO))

# test_start_index = eval_end_index - LOOKBACK
# test_end_index   = data_length

# split_config = {
#     "train": [train_start_index, train_end_index],
#     "valid": [eval_start_index, eval_end_index],
#     "test":  [test_start_index, test_end_index],
# }

# train_dataset, valid_dataset, test_dataset = get_datasets(
#     tsp,
#     df_ttm,
#     split_config=split_config
# )

# print(f"train_dataset: {len(    train_dataset)}")
# print(f"valid_dataset: {len(valid_dataset)}")
# print(f"test_dataset: {len(test_dataset)}")


### 6.2) Chargement du modèle TTM

In [None]:

# Checkpoint public utilisé par TSFM (le notebook Kaggle utilise TinyTimeMixerForPrediction)
# Remarque : selon l'environnement, le téléchargement HF peut prendre du temps.
# Le checkpoint est entraîné avec prediction_length=96, on doit l'utiliser tel quel
# On utilisera ensuite seulement les HORIZON_STEPS premiers pas de la prédiction
model = TinyTimeMixerForPrediction.from_pretrained(
    "ibm-granite/granite-timeseries-ttm-r2",  # checkpoint TTM (prediction_length=96)
    context_length=LOOKBACK,
    prediction_length=96  # Le checkpoint utilise 96, on doit respecter cette valeur
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"✓ Modèle chargé sur {device}")
print(f"  - context_length: {model.config.context_length}")
print(f"  - prediction_length: {model.config.prediction_length}")
print(f"  - On utilisera les {HORIZON_STEPS} premiers pas de la prédiction")


### 6.3) Fine-tuning léger (optionnel mais recommandé)

In [None]:

# Entraînement léger sur CPU possible mais plus lent.
# Vous pouvez passer SKIP_FINETUNE=True pour tester rapidement en zero-shot.

SKIP_FINETUNE = False

if not SKIP_FINETUNE:
    # Arguments d'entraînement
    training_args = TrainingArguments(
        output_dir="ttm_finetune_out",
        learning_rate=5e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=5,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        logging_steps=50,
        seed=RANDOM_SEED,
        fp16=torch.cuda.is_available(),
        report_to="none"
    )

    optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)
    steps_per_epoch = max(1, len(train_dataset) // training_args.per_device_train_batch_size)
    scheduler = OneCycleLR(
        optimizer,
        max_lr=training_args.learning_rate,
        steps_per_epoch=steps_per_epoch,
        epochs=training_args.num_train_epochs,
    )

    early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=valid_dataset,
        callbacks=[early_stopping],
        optimizers=(optimizer, scheduler)
    )

    trainer.train()


### 6.4) Prédiction sur test → signal Buy/No-trade

In [None]:
# Sauvegarde du modèle TinyTimeMixer pour une réutilisation en production

import torch

# Sauvegarde complète de l'état du modèle (architecture + poids)
#torch.save(model, "ttm_model_full.pt")

# OU, plus communément en production :
# Sauvegarde seulement des poids (state_dict)
torch.save(model.state_dict(), "ttm_model_state_dict.pt")

# Pour recharger plus tard :
# - Recharger tout (si architecture inchangée) :
#   model = torch.load("ttm_model_full.pt")
# - OU, pour charger les poids dans une nouvelle instance :
#   model = TinyTimeMixerForPrediction(...)  # instancier avec la même architecture
#   model.load_state_dict(torch.load("ttm_model_state_dict.pt"))


In [None]:

forecast_pipeline = TimeSeriesForecastingPipeline(
    model=model,
    device=device,
    timestamp_column=timestamp_column,
    id_columns=[],
    target_columns=target_columns,
    observable_columns=observable_columns,
    freq=TIMEFRAME
)

# Prévisions : on obtient une table avec Close réel et Close_prediction (format pipeline TSFM)
ttm_forecast = forecast_pipeline(tsp.preprocess(df_ttm.iloc[test_start_index:test_end_index].copy()))
ttm_forecast.head(), ttm_forecast.tail()



In [None]:
# Traçage de la courbe Close réel et Close_prediction, gestion des séquences éventuelles (list/array)
def extract_scalar_for_plot(x, index=0):
    try:
        if isinstance(x, (list, tuple, np.ndarray)):
            if len(x) > index:
                return float(x[index])
            elif len(x) > 0:
                return float(x[0])
            else:
                return np.nan
        else:
            return float(x)
    except Exception:
        return np.nan

# Extraction sécurisée des valeurs pour l'affichage
close_y = [extract_scalar_for_plot(x) for x in ttm_forecast["nextClose_pct_change"]]
close_pred_y = [extract_scalar_for_plot(x) for x in ttm_forecast["nextClose_pct_change_prediction"]]
timestamps = ttm_forecast["Timestamp"]

plt.figure(figsize=(15, 5))
plt.plot(timestamps, close_y, label="Close réel", color="blue")
plt.plot(timestamps, close_pred_y, label="Close prédit", color="red")
plt.legend()
plt.show()


In [None]:
y_pred_ttm = (pd.DataFrame(close_pred_y) >= 0).astype(int)
y_pred_ttm

In [None]:

y_pred_ttm.value_counts()

### Backtest simple (TTM)

In [None]:
type(y_pred_ttm.values)

In [None]:
y_pred_ttm.values

In [None]:
test_df.reset_index().shape

In [None]:
backtester_ttm = Backtest(test_df.reset_index(), y_pred_ttm.values, fee_roundtrip=0, pct_capital=1, capital_init=CAPITAL_INIT, debug=False)
plot_backtest(backtester_ttm)



## 7 Comparaison synthétique + courbes d'equity

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

# Récupérer les courbes d'equity des différents backtest
backtests = [
    ("RandomForest Simple", backtester_rf_simple),
    ("RandomForest Advanced", backtester_rf_advanced),
    ("TabNet", backtester_tabnet),
    ("TTM", backtester_ttm),
]

plt.figure(figsize=(15,5))

for name, bt in backtests:
    # Capital curve déjà stockée dans backtester.df_trades ou reconstruite depuis les trades si besoin
    df_curve = bt.df_bt.copy().reset_index(drop=True)
    # On merge le capital atteint lors de chaque trade, puis ffill pour combler entre trades
    df_curve = df_curve.merge(bt.df_trades[["Timestamp", "Capital"]], on="Timestamp", how="left")
    df_curve["Capital"] = df_curve["Capital"].ffill().fillna(bt.capital_init)
    plt.plot(df_curve["Timestamp"], df_curve["Capital"], label=name)

plt.title("Courbes d'equity des différents backtests")
plt.xlabel("Timestamp")
plt.ylabel("Capital")
plt.legend()
plt.show()


## 8 Notes pour le dashboard Streamlit
- Charger les derniers points OHLCV
- Calculer features RF et prédiction RF
- Exécuter TabNet pipeline sur la fenêtre récente et générer signal
- Afficher signal + courbe prix + métriques

### Prédiction avec TabNet

In [None]:

# Charger les derniers points OHLCV
from datetime import timedelta

last_data = fetch_ohlcv_binance("BTCUSDC", "1h", format(datetime.now()-timedelta(days=7),"%Y-%m-%d"), format(datetime.now(),"%Y-%m-%d"))
# Appliquer le pipeline de préprocessing des données
last_data_model, features_cols_model = prepare_data_min_features(last_data,HORIZON_STEPS,THRESH)
# Charger le modèle TabNet


tabnet_model =joblib.load("tabnet_model.pkl")
# Faire les prédictions
last_data_model_s  = scaler.transform(last_data_model[feature_cols].values)
tabnet_pred_tab = tabnet_model.predict_proba(last_data_model_s)[:, 1]
tabnet_pred = (tabnet_pred_tab >= 0.5).astype(int)


In [None]:
# Charger le modèle TabNet
tabnet_model =joblib.load("tabnet_model.pkl")

In [None]:
last_data_model['Timestamp'].iloc[-1].timestamp()

In [None]:
(datetime.utcnow()-timedelta(hours=1))

In [None]:
last_data_model['Timestamp'].iloc[-1]

In [None]:
to_datetime(datetime.utcnow()-timedelta(minutes=5),utc=True)>last_data_model['Timestamp'].iloc[-1]

In [None]:
backtester_tabnet_pred = Backtest(last_data_model.reset_index(), tabnet_pred, fee_roundtrip=0, pct_capital=1, capital_init=CAPITAL_INIT, debug=False)
plot_backtest(backtester_tabnet_pred)

### Trading live

In [None]:
print(f"Nous sommes le {format(datetime.now(), '%d/%m/%Y')},  il est {format(datetime.now(), '%H:%M:%S')}")
last_data = fetch_ohlcv_binance("BTCUSDC", "1h", format(datetime.now()-timedelta(days=7),"%Y-%m-%d"), format(datetime.now(),"%Y-%m-%d"))
last_data_model, features_cols_model = prepare_data_min_features(last_data,HORIZON_STEPS,THRESH)
last_data_model_s  = scaler.transform(last_data_model[feature_cols].values)
tabnet_pred_tab = tabnet_model.predict_proba(last_data_model_s)[:, 1]
tabnet_pred = (tabnet_pred_tab >= 0.5).astype(int)
print(f"Le cours du Bitcoin est actuellement de {last_data_model['Close'].iloc[-1]} USDC")
if (to_datetime(datetime.utcnow()-timedelta(minutes=5),utc=True)  < last_data_model['Timestamp'].iloc[-1]) :
    print(f"Le signal TabNet pour les 5 premières minutes de l'heure est : {'Buy' if tabnet_pred[-1] == 1 else 'No-trade or Sell'}")
else:
    print(f"Le signal TabNet n'est pas disponible pour trader, il faut attendre {(last_data_model['Timestamp'].iloc[-1]+timedelta(minutes=60)-to_datetime(datetime.utcnow(),utc=True))} minutes")

### Prediction avec TTM

In [None]:
# Charger les dernières données OHLCV (7 derniers jours)
from datetime import datetime, timedelta

last_data = fetch_ohlcv_binance("BTCUSDC", "1h",
                                format(datetime.now()-timedelta(days=90), "%Y-%m-%d"),
                                format(datetime.now(), "%Y-%m-%d"))

# Appliquer le pipeline de préprocessing du TTM (même que dans le notebook)
# Le TTM utilise prepare_data_min_features, pas advanced_features
last_data_TTM, features_cols_TTM = prepare_data_min_features(last_data,HORIZON_STEPS,THRESH)

# Ajouter la colonne nextClose_pct_change (requise par le preprocessor TTM)
# Pour les prédictions, on peut mettre NaN ou 0, car on ne l'utilise pas
last_data_TTM["nextClose_pct_change"] = (last_data_TTM["Close"].shift(-1) - last_data_TTM["Close"]) / last_data_TTM["Close"]

# Définir les colonnes pour le preprocessor TTM (même configuration que dans le notebook)
timestamp_column = "Timestamp"
target_columns = ["nextClose_pct_change"]
observable_columns = features_cols_TTM

# Créer le preprocessor TTM (même configuration que dans le notebook)
tsp = TimeSeriesPreprocessor(
    timestamp_column=timestamp_column,
    id_columns=[],
    target_columns=target_columns,
    observable_columns=observable_columns,
    freq=TIMEFRAME,
    context_length=LOOKBACK,
    prediction_length=96  # Doit correspondre au modèle pré-entraîné
)

# Charger le modèle TTM sauvegardé
device = "cuda" if torch.cuda.is_available() else "cpu"
ttm_model = TinyTimeMixerForPrediction.from_pretrained(
    "ibm-granite/granite-timeseries-ttm-r2",  # checkpoint TTM (prediction_length=96)
    context_length=LOOKBACK,
    prediction_length=96  # Le checkpoint utilise 96, on doit respecter cette valeur
)
ttm_model.load_state_dict(torch.load("ttm_model_state_dict.pt"))
ttm_model.to(device)
ttm_model.eval()  # Mode évaluation

# Créer le pipeline de forecasting
forecast_pipeline = TimeSeriesForecastingPipeline(
    model=ttm_model,
    device=device,
    timestamp_column=timestamp_column,
    id_columns=[],
    target_columns=target_columns,
    observable_columns=observable_columns,
    freq=TIMEFRAME
)

# Faire les prédictions sur les dernières données
# Le pipeline nécessite des données préprocessées
last_data_preprocessed = tsp.preprocess(last_data_TTM.copy())
ttm_forecast = forecast_pipeline(last_data_preprocessed)

# Extraction sécurisée des valeurs pour l'affichage
close_pred_y = [extract_scalar_for_plot(x) for x in ttm_forecast["nextClose_pct_change_prediction"]]
timestamps = ttm_forecast["Timestamp"]

plt.figure(figsize=(15, 5))
plt.plot(timestamps, close_pred_y, label="Close prédit", color="red")
plt.legend()
plt.show()

y_pred_ttm = (pd.DataFrame(close_pred_y) >= 0).astype(int)
last_data_TTM_pred = last_data_TTM.tail(24*7).copy()
y_pred_ttm_pred = y_pred_ttm.tail(24*7)
print(f"y_pred_ttm_pred.shape: {y_pred_ttm_pred.shape}")
print(f"last_data_TTM_pred.shape: {last_data_TTM_pred.shape}")

In [None]:
backtester_ttm = Backtest(last_data_TTM_pred.reset_index(), y_pred_ttm_pred.values, fee_roundtrip=0, pct_capital=1, capital_init=CAPITAL_INIT, debug=False)
plot_backtest(backtester_ttm)