Ce code entraîne un modèle simple (régression logistique) pour prédire des périodes de stress de marché sur un panier de cryptos, où “stress” signifie que la volatilité future (sur quelques jours) du portefeuille devient élevée. 

Ensuite, au lieu de couper l’exposition brutalement, on utilise cette probabilité pour créer un gate (un interrupteur avec hystérésis) qui réduit l’exposition seulement quand le stress est vraiment probable, et qui évite de faire on/off tous les jours. 

Enfin, on combine ce gate avec une allocation inverse-volatilité + vol targeting pour obtenir des poids finaux plus “risk managed”.

In [None]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score

# params
SYMBOLS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT", "DOGEUSDT"]
DATA_DIR = "binance_public_data"
START = "2018-01-01"
END   = "2025-12-31"

FREQ = 365
TEST_DAYS = 365

WIN_VOL = 20
LAM = 0.94
WIN_M1, WIN_M2, WIN_M3 = 5, 20, 60

# stress label
# stress = realised portfolio vol next 5d above quantile
H_STRESS = 5
Q_STRESS = 0.85   # stress frequency

def load_symbol(sym):
    fn = f"{DATA_DIR}/{sym}_1d_2021_2025.csv"  # adapte
    df = pd.read_csv(fn, parse_dates=["timestamp"], index_col="timestamp").sort_index()
    df = df.loc[START:END]
    df["ret"] = np.log(df["close"] / df["close"].shift(1))
    return df.dropna(subset=["ret"])

dfs = {s: load_symbol(s) for s in SYMBOLS}

# align
idx = None
for s in SYMBOLS:
    idx = dfs[s].index if idx is None else idx.intersection(dfs[s].index)

rets  = pd.DataFrame({s: dfs[s].loc[idx, "ret"] for s in SYMBOLS}).dropna()
close = pd.DataFrame({s: dfs[s].loc[rets.index, "close"] for s in SYMBOLS})
high  = pd.DataFrame({s: dfs[s].loc[rets.index, "high"] for s in SYMBOLS})
low   = pd.DataFrame({s: dfs[s].loc[rets.index, "low"] for s in SYMBOLS})
openp = pd.DataFrame({s: dfs[s].loc[rets.index, "open"] for s in SYMBOLS})
volu  = pd.DataFrame({s: dfs[s].loc[rets.index, "volume"] for s in SYMBOLS})

# features mats
vol_ewma = np.sqrt((rets**2).ewm(alpha=1-LAM).mean())
vol_roll = rets.rolling(WIN_VOL).std()

mom5  = close.pct_change(WIN_M1)
mom20 = close.pct_change(WIN_M2)
mom60 = close.pct_change(WIN_M3)

range_ = (high - low).replace(0, np.nan)
body_  = (close - openp).abs()
body_ratio = (body_ / range_)

vol_norm = volu / volu.rolling(20).mean()

# baseline weights for building stress label (simple equal weight)
w_eq = pd.DataFrame(1.0, index=rets.index, columns=SYMBOLS)
w_eq = w_eq.div(w_eq.sum(axis=1), axis=0)

# realised future vol label (portfolio)
port_ret = (w_eq.shift(1) * rets).sum(axis=1)
realised_future_vol = port_ret.rolling(H_STRESS).std().shift(-1)

# train-only threshold later
cutoff_test = rets.index.max() - pd.Timedelta(days=TEST_DAYS)
train_mask = rets.index < cutoff_test
test_mask  = rets.index >= cutoff_test

thr = realised_future_vol[train_mask].quantile(Q_STRESS)
y = (realised_future_vol >= thr).astype(int)

# build X (daily, not stacked)
X = pd.DataFrame(index=rets.index)

# risk features
X["avg_vol20"] = vol_roll.mean(axis=1)
X["avg_ewma"]  = vol_ewma.mean(axis=1)
X["cs_disp"]   = vol_roll.std(axis=1)

# market momentum proxy
X["btc_mom20"] = mom20["BTCUSDT"]
X["btc_mom60"] = mom60["BTCUSDT"]

# volume proxy
X["avg_vol_norm"] = vol_norm.mean(axis=1)

# range proxy
X["avg_body_ratio"] = body_ratio.mean(axis=1)

# clean align
ds = pd.concat([X, y.rename("stress")], axis=1).dropna()

X_all = ds.drop(columns=["stress"])
y_all = ds["stress"]

# split
X_train = X_all[X_all.index < cutoff_test]
y_train = y_all[y_all.index < cutoff_test]
X_test  = X_all[X_all.index >= cutoff_test]
y_test  = y_all[y_all.index >= cutoff_test]

# model
clf = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=4000, class_weight="balanced", C=1.0))
])

clf.fit(X_train, y_train)

p_train = clf.predict_proba(X_train)[:, 1]
p_test  = clf.predict_proba(X_test)[:, 1]

print("stress rate train:", float(y_train.mean()), "test:", float(y_test.mean()))
print("AUC test:", float(roc_auc_score(y_test, p_test)))
print("AP  test:", float(average_precision_score(y_test, p_test)))

# proba series
p_stress = pd.Series(index=X_all.index, dtype=float)
p_stress.loc[X_train.index] = p_train
p_stress.loc[X_test.index]  = p_test
p_stress = p_stress.sort_index()

print("proba dates:", p_stress.index.min(), "->", p_stress.index.max(), "n", len(p_stress))


stress rate train: 0.13581129378127232 test: 0.10655737704918032

AUC test: 0.8448208264722027

AP  test: 0.5585310452959118

proba dates: 2021-03-03 00:00:00 -> 2025-12-31 00:00:00 n 1765