In [None]:
# VaR Backtesting (Historical VaR)

This notebook backtests Historical VaR estimates by measuring how often realized portfolio losses exceed the VaR threshold (violations).

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

WEIGHTS = {
    "SPY": 0.20,
    "QQQ": 0.20,
    "IWM": 0.15,
    "TLT": 0.20,
    "GLD": 0.15,
    "HYG": 0.10
}

DATA_PATH = Path("../data/prices.csv")  # notebook is inside notebook/

In [None]:
prices = pd.read_csv(DATA_PATH)
prices["date"] = pd.to_datetime(prices["date"])
prices = prices.sort_values(["ticker", "date"]).reset_index(drop=True)

print("Rows, cols:", prices.shape)
print("Tickers:", sorted(prices["ticker"].unique()))
prices.head()

In [None]:
prices["ret"] = prices.groupby("ticker")["adj_close"].pct_change()
returns = prices.dropna(subset=["ret"]).copy()

returns["weight"] = returns["ticker"].map(WEIGHTS)
if returns["weight"].isna().any():
    missing = returns.loc[returns["weight"].isna(), "ticker"].unique()
    raise ValueError(f"Missing weights for tickers: {missing}")

returns["w_ret"] = returns["ret"] * returns["weight"]

portfolio = (
    returns.groupby("date")["w_ret"].sum()
    .rename("portfolio_ret")
    .to_frame()
    .sort_index()
)

portfolio["loss"] = -portfolio["portfolio_ret"]
portfolio.head()

In [None]:
#Set VaR values

var_95 = 0.0109   
var_99 = 0.0197   

var_95, var_99

In [None]:
#Create violation series
portfolio["VaR_95"] = var_95
portfolio["VaR_99"] = var_99

portfolio["hit_95"] = (portfolio["loss"] > portfolio["VaR_95"]).astype(int)
portfolio["hit_99"] = (portfolio["loss"] > portfolio["VaR_99"]).astype(int)

portfolio[["loss", "VaR_95", "hit_95", "VaR_99", "hit_99"]].head()

In [None]:
#Violation summury
def summarize_hits(hit: pd.Series, alpha: float) -> dict:
    n = int(hit.shape[0])
    x = int(hit.sum())
    rate = x / n
    expected = 1 - alpha
    return {"obs": n, "violations": x, "violation_rate": rate, "expected_rate": expected}

summary = pd.DataFrame(
    [summarize_hits(portfolio["hit_95"], 0.95),
     summarize_hits(portfolio["hit_99"], 0.99)],
    index=["VaR 95%", "VaR 99%"]
)

summary.applymap(lambda v: f"{v:.2%}" if isinstance(v, float) else v)

In [None]:
## Kupiec Test Function
from scipy.stats import chi2
import numpy as np

def kupiec_test(n, x, alpha):
    """
    Kupiec Proportion of Failures (POF) test
    """
    p = 1 - alpha
    phat = x / n

    # Likelihood ratio statistic
    lr_pof = -2 * np.log(
        ((1 - p) ** (n - x) * (p ** x)) /
        ((1 - phat) ** (n - x) * (phat ** x))
    )

    p_value = 1 - chi2.cdf(lr_pof, df=1)

    return lr_pof, p_value


In [None]:
### Apply Kupiec Test to My Results
n = 2776

lr_95, p_95 = kupiec_test(n, 140, 0.95)
lr_99, p_99 = kupiec_test(n, 28, 0.99)

results = pd.DataFrame(
    {
        "LR statistic": [lr_95, lr_99],
        "p-value": [p_95, p_99],
    },
    index=["VaR 95%", "VaR 99%"]
)

results


In [None]:
###  Violation timeline plot (visual sanity check)

import matplotlib.pyplot as plt
import os

# ensure figures directory exists
os.makedirs("../figures", exist_ok=True)




loss = portfolio["loss"].dropna()

# Historical VaR thresholds (loss quantiles)
var_95 = loss.quantile(0.95)
var_99 = loss.quantile(0.99)

# Violation flags
viol_95 = loss > var_95
viol_99 = loss > var_99

# Plot: losses + VaR lines + violation markers
plt.figure(figsize=(12, 5))
plt.plot(loss.index, loss.values, linewidth=1, alpha=0.7, label="Daily loss")
plt.axhline(var_95, linewidth=1.5, linestyle="--", label=f"VaR 95% = {var_95:.2%}")
plt.axhline(var_99, linewidth=1.5, linestyle="--", label=f"VaR 99% = {var_99:.2%}")

# Mark violations
plt.scatter(loss.index[viol_95], loss[viol_95], s=20, label="VaR 95% violations")
plt.scatter(loss.index[viol_99], loss[viol_99], s=30, label="VaR 99% violations")

plt.title("VaR Violations Timeline (Historical VaR)")
plt.xlabel("Date")
plt.ylabel("Loss")
plt.legend()
plt.tight_layout()
# save figure
plt.savefig(
    "../figures/var_violation_timeline.png",
    dpi=300,
    bbox_inches="tight"
)

plt.show()


