Ce bloc prépare les fonctions de base utilisées dans le projet : chargement des séries de prix (OHLCV) depuis les CSV Binance, calcul des rendements log, création de features de volatilité (EWMA, volatilité rolling, et un indicateur de régime via un z-score). 
On définit aussi des fonctions pour mesurer la performance d’un portefeuille (rendement annualisé, volatilité annualisée, Sharpe) et le risque (maximum drawdown). 
Ces outils servent ensuite à construire et comparer des stratégies.

In [None]:
import pandas as pd
import numpy as np

# 0) params
SYMBOLS = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT", "DOGEUSDT"]  # + doge
DATA_DIR = "binance_public_data"
START = "2021-01-01"
END   = "2025-12-31"


# 1) load data
def load_symbol(sym):
    # 1a) path csv
    fn = f"{DATA_DIR}/{sym}_1d_2021_2025.csv"

    # 1b) read + index temps
    df = pd.read_csv(fn, parse_dates=["timestamp"], index_col="timestamp").sort_index()

    # 1c) filtre dates
    df = df.loc[START:END]

    # 1d) log returns
    df["ret"] = np.log(df["close"] / df["close"].shift(1))

    # 1e) drop na
    return df.dropna(subset=["ret"])


# 2) vol features
def add_vol_features(df, lam=0.94, win=20):
    # 2a) ewma vol
    df["vol_ewma"] = np.sqrt((df["ret"] ** 2).ewm(alpha=1 - lam).mean())

    # 2b) rolling vol
    df["vol_roll20"] = df["ret"].rolling(win).std()

    # 2c) regime vol (zscore)
    m = df["vol_ewma"].rolling(252).mean()
    s = df["vol_ewma"].rolling(252).std()
    df["vol_z"] = (df["vol_ewma"] - m) / s

    return df


# 3) drawdown
def max_drawdown(equity):
    # 3a) peak
    peak = equity.cummax()

    # 3b) drawdown
    dd = equity / peak - 1.0

    return float(dd.min())


# 4) perf stats
def perf_stats(port_ret, freq=365):
    # 4a) clean
    port_ret = port_ret.dropna()

    # 4b) equity (simple comp)
    equity = (1 + port_ret).cumprod()

    # 4c) mean / vol annualisé
    mu = port_ret.mean() * freq
    sig = port_ret.std() * np.sqrt(freq)

    # 4d) sharpe
    sharpe = float(mu / sig) if sig > 0 else np.nan

    # 4e) max dd
    mdd = max_drawdown(equity)

    # 4f) ann return (geom)
    ann_ret = float(equity.iloc[-1] ** (freq / len(port_ret)) - 1) if len(port_ret) > 0 else np.nan

    return {
        "ann_return": ann_ret,
        "ann_vol": float(sig),
        "sharpe": sharpe,
        "max_dd": mdd,
    }

Les rendements sont calculés en log-returns, ce qui est standard en finance pour éviter certains effets de composition. Les features de volatilité combinent une mesure réactive (EWMA) et une mesure plus lissée (rolling). Le z-score de volatilité sert à détecter des périodes de stress (volatilité anormalement élevée) afin de pouvoir réduire l’exposition dans les stratégies. Enfin, les métriques de performance donnent une première lecture du couple rendement/risque.

In [None]:
import io
import zipfile
import requests
import pandas as pd
from pathlib import Path

# 1) parametres globaux
SYMBOLS = [
    "BTCUSDT",
    "ETHUSDT",
    "BNBUSDT",
    "SOLUSDT",
    "XRPUSDT",
    "DOGEUSDT"
]

INTERVAL = "1d"
YEARS = [2021, 2022, 2023, 2024, 2025]

# 2) dossier sortie
OUT_DIR = Path("binance_public_data")
OUT_DIR.mkdir(exist_ok=True)

# 3) url binance vision
BASE = "https://data.binance.vision/data/spot/monthly/klines"

# 4) colonnes officielle binance
BINANCE_COLS = [
    "open_time", "open", "high", "low", "close", "volume",
    "close_time", "quote_asset_volume", "number_of_trades",
    "taker_buy_base_volume", "taker_buy_quote_volume", "ignore"
]


def to_datetime_open_time(x):
    # 5) timestamp -> date
    x = int(x)
    if x > 10_000_000_000_000:
        return pd.to_datetime(x, unit="us", utc=True).tz_convert(None)
    return pd.to_datetime(x, unit="ms", utc=True).tz_convert(None)


def download_month(symbol, year, month):
    # 6) telechargement mensuel
    m = f"{month:02d}"
    url = f"{BASE}/{symbol}/{INTERVAL}/{symbol}-{INTERVAL}-{year}-{m}.zip"

    r = requests.get(url, timeout=3)
    if r.status_code != 200:
        return None

    # unzip
    z = zipfile.ZipFile(io.BytesIO(r.content))
    csv_name = [n for n in z.namelist() if n.endswith(".csv")][0]
    raw_csv = z.read(csv_name)

    df = pd.read_csv(io.BytesIO(raw_csv), header=None)
    df.columns = BINANCE_COLS

    # conversion float
    for c in ["open", "high", "low", "close", "volume"]:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    # index temps
    df["timestamp"] = df["open_time"].apply(to_datetime_open_time)
    df = df.set_index("timestamp").sort_index()

    return df[["open", "high", "low", "close", "volume"]]


def download_symbol(symbol):
    # boucle annees / mois
    parts = []

    for y in YEARS:
        for m in range(1, 13):
            df_m = download_month(symbol, y, m)
            if df_m is not None and not df_m.empty:
                parts.append(df_m)

    if not parts:
        return None

    df = pd.concat(parts).sort_index()
    df = df[~df.index.duplicated(keep="first")]
    df = df.loc["2021-01-01":"2025-12-31"]

    return df


# 8) download univers complet
all_data = {}

for sym in SYMBOLS:
    print("download", sym)

    df = download_symbol(sym)
    if df is None or df.empty:
        print("skip", sym)
        continue

    print("dates", df.index.min(), "->", df.index.max(), "n", len(df))

    out_path = OUT_DIR / f"{sym}_{INTERVAL}_2021_2025.csv"
    df.to_csv(out_path)

    all_data[sym] = df
    print("saved", out_path.as_posix())

download BTCUSDT
dates 2021-01-01 00:00:00 -> 2025-12-31 00:00:00 n 1826
saved binance_public_data/BTCUSDT_1d_2021_2025.csv
download ETHUSDT
dates 2021-01-01 00:00:00 -> 2025-12-31 00:00:00 n 1826
saved binance_public_data/ETHUSDT_1d_2021_2025.csv
download BNBUSDT
dates 2021-01-01 00:00:00 -> 2025-12-31 00:00:00 n 1826
saved binance_public_data/BNBUSDT_1d_2021_2025.csv
download SOLUSDT
dates 2021-01-01 00:00:00 -> 2025-12-31 00:00:00 n 1826
saved binance_public_data/SOLUSDT_1d_2021_2025.csv
download XRPUSDT
dates 2021-01-01 00:00:00 -> 2025-12-31 00:00:00 n 1826
saved binance_public_data/XRPUSDT_1d_2021_2025.csv
download DOGEUSDT
dates 2021-01-01 00:00:00 -> 2025-12-31 00:00:00 n 1826
saved binance_public_data/DOGEUSDT_1d_2021_2025.csv

In [None]:
# backtest
def run_backtest(weights, rets, cost_bps=10):
    # weights: t weights used for t->t+1
    # costs: turnover
    w = weights.shift(1).fillna(0.0)
    w = w.clip(lower=0)  # long-only
    w = w.div(w.sum(axis=1).replace(0, np.nan), axis=0).fillna(0.0)

    port_gross = (w * rets).sum(axis=1)

    # turnover
    turnover = w.diff().abs().sum(axis=1).fillna(0.0)
    costs = turnover * (cost_bps / 10000.0)

    port_net = port_gross - costs
    return port_net, turnover

# data panel
dfs = {}
for s in SYMBOLS:
    df = load_symbol(s)
    df = add_vol_features(df)
    dfs[s] = df

# align dates
common_idx = None
for s, df in dfs.items():
    common_idx = df.index if common_idx is None else common_idx.intersection(df.index)

# returns matrix
rets = pd.DataFrame({s: dfs[s].loc[common_idx, "ret"] for s in SYMBOLS}).dropna()

# vols matrix
vol = pd.DataFrame({s: dfs[s].loc[rets.index, "vol_ewma"] for s in SYMBOLS})
volz = pd.DataFrame({s: dfs[s].loc[rets.index, "vol_z"] for s in SYMBOLS})

# buy&hold equal weight
w_bh = pd.DataFrame(1.0, index=rets.index, columns=rets.columns)

# strat 1: inv-vol
# inverse vol
w_invvol = 1.0 / vol.replace(0, np.nan)
w_invvol = w_invvol.fillna(0.0)

# strat 2: low-vol filter + equal weight
# quantile vol cross-section
q = vol.quantile(0.5, axis=1)
w_lowvol = (vol.le(q, axis=0)).astype(float)

# strat 3: risk-off regime
# if avg z-score high -> cash
avg_z = volz.mean(axis=1)
risk_on = (avg_z < 1.0).astype(float)  # threshold
w_regime = w_invvol.mul(risk_on, axis=0)

# run
bt = {}

bt["buy_hold_eq"] = run_backtest(w_bh, rets, cost_bps=0)
bt["inv_vol"]     = run_backtest(w_invvol, rets, cost_bps=10)
bt["low_vol"]     = run_backtest(w_lowvol, rets, cost_bps=10)
bt["regime_iv"]   = run_backtest(w_regime, rets, cost_bps=10)

# summary
rows = []
for name, (r, to) in bt.items():
    st = perf_stats(r, freq=365)
    st["turnover_mean"] = float(to.mean())
    st["turnover_med"] = float(to.median())
    st["name"] = name
    rows.append(st)

summary = pd.DataFrame(rows).set_index("name").sort_values("sharpe", ascending=False)
print(summary)

# equity plots
import matplotlib.pyplot as plt

plt.figure(figsize=(12,5))
for name, (r, _) in bt.items():
    eq = (1 + r.fillna(0)).cumprod()
    plt.plot(eq.index, eq, label=name)
plt.legend()
plt.title("Naive strategies equity (net)")
plt.tight_layout()
plt.show()


             ann_return   ann_vol    sharpe    max_dd  turnover_mean  \
name                                                                   
buy_hold_eq    0.207873  0.747665  0.634738 -0.885516       0.000548   
low_vol        0.205705  0.651823  0.617138 -0.819649       0.057169   
inv_vol        0.190756  0.697918  0.607007 -0.859771       0.020368   
regime_iv     -0.079206  0.510737  0.097390 -0.820377       0.040366   

             turnover_med  
name                       
buy_hold_eq      0.000000  
low_vol          0.000000  
inv_vol          0.010914  
regime_iv        0.007068  

![Image test 5](image/image_test_5.png)

Sur cette période, les trois stratégies “toujours investies” (buy&hold, low-vol, inv-vol) affichent un rendement annualisé positif et un Sharpe autour de 0.60, ce qui indique que l’exposition au marché crypto sur la période est globalement rémunératrice. 

La stratégie low-vol réduit légèrement la volatilité (ann_vol plus faible) tout en gardant un rendement proche du buy&hold, mais avec un turnover moyen plus élevé (donc plus sensible aux coûts si on augmente les frais). 

À l’inverse, la stratégie “regime_iv” est plus défensive (volatilité plus basse) mais sa performance devient négative, ce qui suggère que le filtre risk-off coupe trop souvent l’exposition pendant des phases où le marché remonte, ou que le seuil (avg_z < 1.0) est trop strict. Une amélioration naturelle serait de calibrer le seuil, ou d’autoriser une allocation partielle en cash plutôt que 0/1.