# 📉 ES & VaR Scenario Analysis

This notebook computes **Value-at-Risk (VaR)** and **Expected Shortfall (ES)** across historical, Monte Carlo, and stress-test scenarios.

**Files (optional):**
- `./data/pnl.csv` with a column `pnl` or `ret` (daily). If missing, synthetic data is generated.
- `./data/returns.csv` wide matrix of returns (dates x tickers) for multi-asset Monte Carlo; optional.

All charts use matplotlib; no external dependencies beyond SciPy (optional).

In [None]:
import os, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
try:
    from scipy.stats import norm
    HAVE_SCIPY = True
except Exception:
    HAVE_SCIPY = False

plt.rcParams['figure.figsize'] = (10,4)

PNL_PATH = './data/pnl.csv'          # optional; expects columns: date(optional), pnl or ret
RETURNS_PATH = './data/returns.csv'  # optional; wide returns for multi-asset MC

np.random.seed(42)

def load_pnl():
    if os.path.exists(PNL_PATH):
        df = pd.read_csv(PNL_PATH)
        if 'date' in df.columns:
            df['date'] = pd.to_datetime(df['date'])
            df = df.sort_values('date').reset_index(drop=True)
        if 'pnl' in df.columns:
            pnl = df['pnl'].astype(float)
        elif 'ret' in df.columns:
            base = 1_000_000.0
            equity = base * (1.0 + df['ret'].astype(float)).cumprod()
            pnl = equity - base
        else:
            raise ValueError('pnl.csv must have column pnl or ret')
        return pnl
    # demo pnl from normal returns
    rets = np.random.normal(0.0005, 0.02, 1000)
    base = 1_000_000.0
    equity = base * (1.0 + rets).cumprod()
    pnl = pd.Series(equity - base)
    return pnl

def load_returns():
    if os.path.exists(RETURNS_PATH):
        df = pd.read_csv(RETURNS_PATH, index_col=0, parse_dates=True)
        df = df.sort_index().fillna(0.0)
        return df
    # demo multi-asset returns
    dates = pd.date_range('2023-01-01', periods=750, freq='B')
    cols = ['AAPL','MSFT','SPX','TLT','GLD']
    n = len(cols)
    # market factor + idiosyncratic
    mkt = np.random.normal(0, 0.009, len(dates))
    data = {}
    for c in cols:
        eps = np.random.normal(0, 0.012, len(dates))
        mu  = 0.0004 if c!='TLT' else 0.0002
        data[c] = mu + 0.6*mkt + eps
    return pd.DataFrame(data, index=dates)

pnl = load_pnl()
R = load_returns()
print('PnL len:', len(pnl), '| Returns shape:', R.shape)
pnl.plot(title='PnL (cumulative)'); plt.show()

## Functions: VaR & ES

In [None]:
def var_es_from_returns(rets, alpha=0.99):
    rets = np.asarray(rets)
    q = np.percentile(rets, (1-alpha)*100.0)
    es = rets[rets <= q].mean() if (rets <= q).size>0 else q
    return q, es

def pnl_to_returns(pnl_series):
    eq = 1_000_000 + pnl_series.values
    rets = pd.Series(eq).pct_change().dropna().values
    return rets

def print_var_es(label, rets, levels=(0.95, 0.99)):
    print(f'--- {label} ---')
    for a in levels:
        var, es = var_es_from_returns(rets, a)
        print(f'{int(a*100)}% VaR: {var:.4f} | ES: {es:.4f}')

## Historical (Non-parametric) VaR/ES

In [None]:
rets_hist = pnl_to_returns(pnl)
print_var_es('Historical', rets_hist)
plt.figure(); plt.hist(rets_hist, bins=60, alpha=0.7); plt.title('Historical Return Distribution'); plt.show() # type: ignore

## Parametric VaR/ES (Gaussian) — optional

In [None]:
if HAVE_SCIPY:
    mu, sd = np.mean(rets_hist), np.std(rets_hist) # type: ignore
    for a in (0.95, 0.99):
        z = norm.ppf(1-a)
        var = mu + sd*z
        # ES for normal: mu - sd * phi(z)/(1-a)
        es = mu - sd * (norm.pdf(z)/(1-a))
        print(f'Parametric Normal {int(a*100)}% VaR: {var:.4f} | ES: {es:.4f}')
else:
    print('SciPy not available -> skipping parametric VaR/ES')

## Monte Carlo (Single-asset) — horizon aggregation

In [None]:
mu, sd = np.mean(rets_hist), np.std(rets_hist) # type: ignore
n_sims, horizon = 20000, 10
paths = np.random.normal(mu, sd, (n_sims, horizon))
agg = paths.sum(axis=1)
print_var_es(f'Monte Carlo {horizon}d (single-asset)', agg)
plt.figure(); plt.hist(agg, bins=60, alpha=0.7); plt.title(f'Monte Carlo {horizon}-day PnL (returns sum)'); plt.show()

## Monte Carlo (Multi-asset, correlated)

In [None]:
def mc_correlated(R, n_sims=20000, horizon=10):
    mu = R.mean().values
    S  = R.cov().values
    # Cholesky (fallback to eigen if not PD)
    try:
        L = np.linalg.cholesky(S)
    except np.linalg.LinAlgError:
        evals, evecs = np.linalg.eigh(S)
        evals = np.clip(evals, 1e-10, None)
        L = evecs @ np.diag(np.sqrt(evals))
    n = len(mu)
    res = []
    w = np.ones(n)/n
    for _ in range(n_sims):
        acc = 0.0
        for _h in range(horizon):
            z = np.random.normal(size=n)
            r = mu + L @ z
            acc += float(w @ r)
        res.append(acc)
    return np.array(res)

agg_multi = mc_correlated(R, 20000, 10)
print_var_es('Monte Carlo (multi-asset, 10d)', agg_multi)
plt.figure(); plt.hist(agg_multi, bins=60, alpha=0.7); plt.title('MC Multi-asset 10d PnL (returns sum)'); plt.show()

## Stress Scenarios (what-if shocks)

In [None]:
def shock_series(rets, drop=0.03, days=20):
    rets = rets.copy()
    if len(rets) < days:
        days = max(1, len(rets)//5)
    rets[:days] = rets[:days] - drop
    return rets

scenarios = {
    'COVID-like (20d -3%)': dict(drop=0.03, days=20),
    'Flash Crash (2d -7%)': dict(drop=0.07, days=2),
    'Slow Bear (60d -0.5%)': dict(drop=0.005, days=60),
}

for name, params in scenarios.items():
    r2 = shock_series(rets_hist.copy(), **params) # type: ignore
    print_var_es(name, r2)
    plt.figure(); plt.hist(r2, bins=60, alpha=0.7); plt.title(f'Returns under {name}'); plt.show() # type: ignore

## Liquidity Haircut + Correlation Breakdown (MC)

In [None]:
def apply_corr_break(R, vol_scale=1.5, corr_break=0.4):
    mu = R.mean().values
    S  = R.cov().values
    d = np.sqrt(np.diag(S))
    C = S / (d[:,None]*d[None,:] + 1e-12)
    C2 = (1-corr_break)*C + corr_break*np.eye(len(mu))
    d2 = d * vol_scale
    S2 = (d2[:,None]*d2[None,:]) * C2
    return mu, S2

mu2, S2 = apply_corr_break(R, vol_scale=2.0, corr_break=0.5)
n = len(mu2)
try:
    L2 = np.linalg.cholesky(S2)
except np.linalg.LinAlgError:
    e, V = np.linalg.eigh(S2)
    e = np.clip(e, 1e-12, None)
    L2 = V @ np.diag(np.sqrt(e))

n_sims, horizon = 20000, 10
w = np.ones(n)/n
acc = []
for _ in range(n_sims):
    s = 0.0
    for _h in range(horizon):
        z = np.random.normal(size=n)
        r = mu2 + L2 @ z # type: ignore
        s += float(w @ r)
    acc.append(s)
acc = np.array(acc)
print_var_es('Liquidity+CorrBreak MC 10d', acc)
plt.figure(); plt.hist(acc, bins=60, alpha=0.7); plt.title('Liquidity Haircut + Corr Breakdown (MC 10d)'); plt.show()