# 📉 Financial Volatility Modeling — GARCH Toolkit  
**by Lyra**

> 实战取向：改参数即可复用；默认 yfinance，无需 token。可选 TuShare（需 token）与 CSV。

Outputs: `./figures/`（图）与 `./outputs/`（预测/指标）。

## 0. Parameters（仅需修改本节即可复用）

In [None]:

# ---- Edit here ----
SYMBOL = "600036.SS"   # 招商银行：Yahoo 格式；也可 AAPL、0700.HK、600036.SS 等
START  = "2018-01-01"
END    = "2025-01-01"

DATA_SOURCE = "yfinance"   # "yfinance"（默认）| "tushare" | "csv"
CSV_PATH    = "zhaoshang.csv"  # 仅在 DATA_SOURCE="csv" 时生效

# GARCH specification
MEAN_MODEL = "Constant"    # "Zero" | "Constant" | "AR(1)"
VOL_MODEL  = "GARCH"       # "ARCH" | "GARCH"
P, Q       = 1, 1          # ARCH/GARCH 阶数（常用 GARCH(1,1)）
DIST       = "normal"      # "normal" | "t"（t 分布）

# Forecast horizon (days)
FORECAST_STEPS = 20

# Output folders
FIG_DIR = "figures"
OUT_DIR = "outputs"
# --------------------

import os, math, numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path

pd.options.display_float_format = "{:.6f}".format
plt.rcParams["figure.figsize"] = (10, 6)

Path(FIG_DIR).mkdir(parents=True, exist_ok=True)
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

print("SYMBOL:", SYMBOL, "| Window:", START, "→", END, "| Source:", DATA_SOURCE)
print("Spec:", f"{MEAN_MODEL} + {VOL_MODEL}({P},{Q})", "| Dist:", DIST, "| Forecast:", FORECAST_STEPS, "d")


## 1. 数据读取（默认 yfinance；可选 TuShare 或 CSV）

In [None]:

def load_yfinance(symbol, start, end):
    try:
        import yfinance as yf
    except Exception:
        raise ImportError("缺少 yfinance，请先安装：pip install yfinance")
    data = yf.download(symbol, start=start, end=end, progress=False)
    if data is None or len(data)==0:
        raise ValueError("yfinance 未返回数据，请检查代码与日期范围。")
    df = data.reset_index()[["Date","Adj Close"]].rename(columns={"Date":"datetime","Adj Close":"close"})
    return df

def load_tushare(symbol, start, end):
    import os
    token = os.getenv("TUSHARE_TOKEN", os.getenv("TS_TOKEN", "")).strip()
    if not token:
        raise ValueError("缺少 TuShare Token，请设置环境变量 TUSHARE_TOKEN。")
    try:
        import tushare as ts
    except Exception:
        raise ImportError("缺少 tushare，请先安装：pip install tushare")
    ts.set_token(token)
    pro = ts.pro_api(token)
    s = pd.Timestamp(start).strftime("%Y%m%d")
    e = pd.Timestamp(end).strftime("%Y%m%d")
    ts_code = symbol if symbol.endswith((".SH",".SZ")) else symbol
    df = pro.daily(ts_code=ts_code, start_date=s, end_date=e)
    if df is None or len(df)==0:
        raise ValueError("TuShare 未返回数据，请检查 ts_code 与日期。")
    df["trade_date"] = pd.to_datetime(df["trade_date"], format="%Y%m%d", errors="coerce")
    df = df.rename(columns={"trade_date":"datetime", "close":"close"})[["datetime","close"]]
    df = df.sort_values("datetime").reset_index(drop=True)
    return df

def load_csv(csv_path):
    df = pd.read_csv(csv_path)
    for c in ["datetime","trade_date","date","Date"]:
        if c in df.columns:
            date_col = c; break
    else:
        raise ValueError("CSV 需包含日期列：datetime/trade_date/date/Date")
    if date_col == "trade_date":
        df[date_col] = pd.to_datetime(df[date_col].astype(str), format="%Y%m%d", errors="coerce")
    else:
        df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
    for c in ["close","Close","Adj Close","adj_close","AdjClose"]:
        if c in df.columns:
            price_col = c; break
    else:
        raise ValueError("CSV 需包含价格列：close/Adj Close 等")
    df = df[[date_col, price_col]].rename(columns={date_col:"datetime", price_col:"close"})
    df = df.sort_values("datetime").dropna().reset_index(drop=True)
    return df

if DATA_SOURCE == "yfinance":
    raw = load_yfinance(SYMBOL, START, END)
elif DATA_SOURCE == "tushare":
    raw = load_tushare(SYMBOL, START, END)
elif DATA_SOURCE == "csv":
    raw = load_csv(CSV_PATH)
else:
    raise ValueError("DATA_SOURCE 必须为 'yfinance' | 'tushare' | 'csv'")

raw = raw[(raw["datetime"]>=START) & (raw["datetime"]<=END)].copy().sort_values("datetime")
print("Data window:", raw["datetime"].min().date(), "→", raw["datetime"].max().date(), "| points:", len(raw))
raw.head()


## 2. 计算收益率（Log Return）与基础统计

In [None]:

df = raw.copy()
df["ret"] = np.log(df["close"] / df["close"].shift(1))
ret = df["ret"].dropna()

daily_mu, daily_sig = ret.mean(), ret.std(ddof=1)
ann_mu, ann_vol = daily_mu*252, daily_sig*np.sqrt(252)

print(f"Daily mean: {daily_mu:.6f}, Daily vol: {daily_sig:.6f}")
print(f"Annualized return: {ann_mu:.4%}, Annualized volatility: {ann_vol:.4%}")
ret.describe()


## 3. 模型设定与拟合（ARCH / GARCH）

In [None]:

try:
    from arch import arch_model
except Exception:
    raise ImportError("缺少 arch 包，请先安装：pip install arch")

mean = MEAN_MODEL.lower()
if mean not in ("zero","constant","ar(1)"):
    raise ValueError("MEAN_MODEL 仅支持 'Zero' | 'Constant' | 'AR(1)'")

vol = VOL_MODEL.upper()
if vol not in ("ARCH","GARCH"):
    raise ValueError("VOL_MODEL 仅支持 'ARCH' | 'GARCH'")

dist = DIST.lower()
if dist not in ("normal","t"):
    raise ValueError("DIST 仅支持 'normal' 或 't'")

am = arch_model(
    ret * 100,
    mean="Zero" if mean=="zero" else ("Constant" if mean=="constant" else "ARX"),
    lags=1 if mean=="ar(1)" else 0,
    vol=vol,
    p=P, q=Q,
    dist=dist
)
res = am.fit(update_freq=10, disp="off")
print(res.summary())


## 4. 条件波动率（Conditional Volatility）可视化

In [None]:

cond_vol = res.conditional_volatility / 100.0 * np.sqrt(252)
vol_ts = pd.Series(cond_vol, index=ret.index)

fig = plt.figure()
plt.plot(vol_ts.index, vol_ts.values, label="Annualized Conditional Volatility")
plt.title("Annualized Conditional Volatility (GARCH)")
plt.legend(); plt.grid(True); plt.tight_layout()
plt.savefig(f"{FIG_DIR}/garch_conditional_vol_annualized.png", dpi=200)
plt.show()


## 5. 波动率预测（Variance/Volatility Forecast）

In [None]:

fcast = res.forecast(horizon=FORECAST_STEPS, reindex=False)
variance = fcast.variance.values[-1]
vol_forecast = np.sqrt(variance) / 100.0 * np.sqrt(252)

fc_df = pd.DataFrame({
    "step": np.arange(1, FORECAST_STEPS+1),
    "annualized_vol": vol_forecast
})
fc_df.to_csv(f"{OUT_DIR}/forecast_annualized_vol.csv", index=False, encoding="utf-8")
fc_df.head()


## 6. 模型诊断（残差与分布）

In [None]:

std_resid = res.std_resid
fig = plt.figure()
plt.plot(std_resid, label="Standardized Residuals")
plt.title("Standardized Residuals")
plt.legend(); plt.grid(True); plt.tight_layout()
plt.savefig(f"{FIG_DIR}/standardized_residuals.png", dpi=200)
plt.show()

fig = plt.figure()
plt.hist(std_resid.dropna(), bins=60, density=True)
plt.title("Standardized Residuals — Distribution")
plt.grid(True); plt.tight_layout()
plt.savefig(f"{FIG_DIR}/standardized_resid_hist.png", dpi=200)
plt.show()


## 7. 导出核心指标

In [None]:

summ = {
    "symbol": SYMBOL,
    "start": START, "end": END,
    "mean_model": MEAN_MODEL, "vol_model": VOL_MODEL, "p": P, "q": Q, "dist": DIST,
    "daily_mean": float(ret.mean()), "daily_vol": float(ret.std(ddof=1)),
    "ann_vol_last": float(vol_ts.dropna().iloc[-1]) if len(vol_ts.dropna())>0 else None,
    "forecast_steps": FORECAST_STEPS,
    "note": "volatility in annualized terms"
}
import pandas as pd
pd.DataFrame([summ]).to_csv(f"{OUT_DIR}/garch_summary.csv", index=False, encoding="utf-8")
print("Saved:", f"{OUT_DIR}/garch_summary.csv")


## 💬 Personal Reflection（个人总结）
GARCH 的核心价值在于识别波动聚类与风险 regime。我的做法是优先保证流程可复用：统一数据接口、固定建模步骤、导出标准化结果。实际投研中，我会根据残差诊断调整分布与阶数，让模型更贴近资产特性。