Dans cette partie, on calcule d’abord les rendements log (log-returns) à partir des prix de clôture.

Ensuite, on construit plusieurs estimateurs de volatilité “classiques” (rolling, EWMA, Parkinson, Garman-Klass). 

Puis on estime une volatilité conditionnelle via des modèles ARCH/GARCH (GARCH, GJR-GARCH et EGARCH) avec deux choix de distribution (normale et Student-t). 

Enfin, on compare ces prédictions de volatilité à une volatilité réalisée (réalisée = écart-type des rendements sur un horizon futur) avec des métriques simples : MSE et corrélation.

In [None]:
import numpy as np
import pandas as pd
from arch import arch_model

# 0) params globaux
WIN = 20
LAM = 0.94
ARCH_WINDOW = 750     # ~3 ans daily (a peu pres)
HORIZONS = (5, 20)    # 1 semaine / 1 mois


def add_returns(df):
    # 1) returns log
    df = df.copy()
    df["ret"] = np.log(df["close"] / df["close"].shift(1))
    return df


def add_vol_simple(df):
    # 2) vol simple (baseline)
    # 2a) rolling std
    df["vol_rolling_20"] = df["ret"].rolling(WIN).std()

    # 2b) ewma
    df["vol_ewma"] = np.sqrt((df["ret"] ** 2).ewm(alpha=1 - LAM).mean())

    # 2c) parkinson (high/low)
    hl = np.log(df["high"] / df["low"])
    df["vol_parkinson"] = np.sqrt((hl ** 2).rolling(WIN).mean() / (4 * np.log(2)))

    # 2d) garman-klass (OHLC)
    ho = np.log(df["high"] / df["open"])
    lo = np.log(df["low"] / df["open"])
    co = np.log(df["close"] / df["open"])
    gk_var = 0.5 * (ho - lo) ** 2 - (2 * np.log(2) - 1) * (co ** 2)
    df["vol_gk"] = np.sqrt(gk_var.rolling(WIN).mean().clip(lower=0))

    return df


def arch_forecast_vol(ret, model="GARCH", dist="normal", window=ARCH_WINDOW):
    # 3) garch forecast (walk-forward)
    # 3a) % scale (arch prefere %)
    r = (ret.dropna() * 100.0).copy()
    vol = pd.Series(index=r.index, dtype=float)

    # 3b) garde fou (pas assez de data)
    if len(r) <= window + 10:
        return vol

    # 3c) boucle expanding window
    for i in range(window, len(r)):
        sub = r.iloc[:i]

        # 3d) choix modele
        if model == "GARCH":
            am = arch_model(sub, vol="Garch", p=1, q=1, dist=dist)
        elif model == "GJR":
            am = arch_model(sub, vol="Garch", p=1, o=1, q=1, dist=dist)
        elif model == "EGARCH":
            am = arch_model(sub, vol="EGarch", p=1, q=1, dist=dist)
        else:
            raise ValueError("bad model")

        # 3e) fit + forecast 1 pas
        res = am.fit(disp="off")
        f = res.forecast(horizon=1)

        # 3f) back scale (% -> decimal)
        vol.iloc[i] = np.sqrt(f.variance.values[-1, 0]) / 100.0

    return vol


def add_vol_arch(df):
    # 4) vol arch/garch
    df["vol_garch_n"]  = arch_forecast_vol(df["ret"], "GARCH", "normal")
    df["vol_gjr_n"]    = arch_forecast_vol(df["ret"], "GJR",   "normal")
    df["vol_egarch_n"] = arch_forecast_vol(df["ret"], "EGARCH","normal")

    df["vol_garch_t"]  = arch_forecast_vol(df["ret"], "GARCH", "t")
    df["vol_gjr_t"]    = arch_forecast_vol(df["ret"], "GJR",   "t")
    df["vol_egarch_t"] = arch_forecast_vol(df["ret"], "EGARCH","t")
    return df


def bench(df, symbol):
    # 5) benchmark pred vol vs realized vol
    rows = []

    for h in HORIZONS:
        # 5a) realized vol (future) = std sur h jours, shift -1
        rv = df["ret"].rolling(h).std().shift(-1).rename("rv")

        # 5b) liste modeles vol
        for col in [
            "vol_rolling_20", "vol_ewma", "vol_parkinson", "vol_gk",
            "vol_garch_n", "vol_gjr_n", "vol_egarch_n",
            "vol_garch_t", "vol_gjr_t", "vol_egarch_t",
        ]:
            tmp = pd.concat([df[col].rename("pred"), rv], axis=1).dropna()

            # 5c) mini taille sample
            if len(tmp) < 250:
                continue

            # 5d) metriques
            mse = float(np.mean((tmp["pred"] - tmp["rv"]) ** 2))
            corr = float(tmp["pred"].corr(tmp["rv"]))

            rows.append({
                "symbol": symbol,
                "h": h,
                "model": col,
                "mse": mse,
                "corr": corr,
                "n": len(tmp)
            })

    return pd.DataFrame(rows)


# 6) run sur tout l'univers
results = []

for sym, df0 in all_data.items():
    print("bench", sym)

    # 6a) returns + vol simple
    df = add_returns(df0)
    df = add_vol_simple(df)

    # 6b) garch / arch (lourd)
    print("arch", sym)
    df = add_vol_arch(df)

    # 6c) bench final
    df = df.dropna(subset=["ret"])
    res = bench(df, sym)

    if not res.empty:
        results.append(res)

# 7) concat + output csv
results_df = pd.concat(results, ignore_index=True) if results else pd.DataFrame()

if results_df.empty:
    print("no results")
else:
    results_df = results_df.sort_values(["h", "symbol", "mse"])

    print("\n=== best per symbol / horizon ===")
    print(results_df.groupby(["symbol", "h"]).head(3))

    results_df.to_csv("vol_benchmark_2021_2025_binance_public.csv", index=False)
    print("\nsaved vol_benchmark_2021_2025_binance_public.csv")


bench BTCUSDT
arch BTCUSDT
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
/tmp/ipython-input-1472060719.py:66: ConvergenceWarning: The optimizer returned code 9. The message is:
Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  res = am.fit(disp="off")
bench ETHUSDT
arch ETHUSDT
bench BNBUSDT
arch BNBUSDT
bench SOLUSDT
arch SOLUSDT
bench XRPUSDT
arch XRPUSDT
bench PENGUUSDT
arch PENGUUSDT

=== best per symbol / horizon ===

        symbol   h           model       mse      corr     n

44     BNBUSDT   5     vol_garch_n  0.000139  0.714936  1074

45     BNBUSDT   5       vol_gjr_n  0.000139  0.715667  1074

46     BNBUSDT   5    vol_egarch_n  0.000142  0.699800  1074

6      BTCUSDT   5    vol_egarch_n  0.000117  0.609112  1074

4      BTCUSDT   5     vol_garch_n  0.000118  0.575491  1074

5      BTCUSDT   5       vol_gjr_n  0.000119  0.514045  1074

26     ETHUSDT   5    vol_egarch_n  0.000216  0.540083  1074

24     ETHUSDT   5     vol_garch_n  0.000220  0.543532  1074

25     ETHUSDT   5       vol_gjr_n  0.000221  0.545699  1074

101  PENGUUSDT   5        vol_ewma  0.000755  0.553713   375

100  PENGUUSDT   5  vol_rolling_20  0.000841  0.414695   359

102  PENGUUSDT   5   vol_parkinson  0.003080  0.108505   360

67     SOLUSDT   5     vol_garch_t  0.000339  0.690644  1074

68     SOLUSDT   5       vol_gjr_t  0.000341  0.685763  1074

64     SOLUSDT   5     vol_garch_n  0.000343  0.714909  1074

88     XRPUSDT   5       vol_gjr_t  0.000533  0.712795  1074

87     XRPUSDT   5     vol_garch_t  0.000538  0.709706  1074

84     XRPUSDT   5     vol_garch_n  0.000543  0.736638  1074

50     BNBUSDT  20  vol_rolling_20  0.000012  0.989284  1805

52     BNBUSDT  20   vol_parkinson  0.000039  0.973069  1806

56     BNBUSDT  20    vol_egarch_n  0.000041  0.879787  1074

10     BTCUSDT  20  vol_rolling_20  0.000005  0.981309  1805

11     BTCUSDT  20        vol_ewma  0.000014  0.946722  1806

18     BTCUSDT  20       vol_gjr_t  0.000024  0.824239  1074

30     ETHUSDT  20  vol_rolling_20  0.000009  0.982145  1805

31     ETHUSDT  20        vol_ewma  0.000033  0.938120  1806

34     ETHUSDT  20     vol_garch_n  0.000039  0.874174  1074

104  PENGUUSDT  20  vol_rolling_20  0.000030  0.951676   359

105  PENGUUSDT  20        vol_ewma  0.000079  0.880743   360

106  PENGUUSDT  20   vol_parkinson  0.001799  0.472589   360

70     SOLUSDT  20  vol_rolling_20  0.000021  0.982458  1805

71     SOLUSDT  20        vol_ewma  0.000082  0.933336  1806

72     SOLUSDT  20   vol_parkinson  0.000100  0.946799  1806

90     XRPUSDT  20  vol_rolling_20  0.000032  0.977433  1805

91     XRPUSDT  20        vol_ewma  0.000099  0.929760  1806

92     XRPUSDT  20   vol_parkinson  0.000197  0.897891  1806

saved vol_benchmark_2021_2025_binance_public.csv
