# 02 — Correlation Analysis

Static full-period Pearson correlation heatmap and rolling 12-month correlation charts for key asset pairs, with regime annotations.

**Key macro regimes annotated:**
- **May 2013** — US Fed Taper Tantrum (bond sell-off)
- **Sep 2018** — IL&FS default (credit crisis in India)
- **Mar 2020** — COVID-19 crash
- **Jan–Dec 2022** — Global rate hike cycle

In [None]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from pathlib import Path

sns.set_theme(style='whitegrid')
plt.rcParams['figure.dpi'] = 120

DB_PATH = Path('../funds.db')

ASSETS = {
    'equity':    {'scheme_code': 120716, 'label': 'Passive Equity'},
    'gold':      {'scheme_code': 111954, 'label': 'Gold ETF'},
    'gilt':      {'scheme_code': 119116, 'label': 'Govt Bonds'},
    'corp_bond': {'scheme_code': 118987, 'label': 'Corp Bonds'},
    'short_dur': {'scheme_code': 118780, 'label': 'Short Duration'},
    'liquid':    {'scheme_code': 119568, 'label': 'Liquid'},
}

conn = sqlite3.connect(DB_PATH)
nav_frames = {}
for asset_id, meta in ASSETS.items():
    df = pd.read_sql_query(
        'SELECT date, nav FROM nav WHERE scheme_code = ? AND date >= ? AND date <= ? ORDER BY date',
        conn, params=(meta['scheme_code'], '2013-01-01', '2025-12-31'),
        parse_dates=['date'], index_col='date',
    )
    nav_frames[asset_id] = df['nav'].rename(asset_id)
conn.close()

nav_df = pd.DataFrame(nav_frames).ffill()
monthly_nav = nav_df.resample('ME').last().dropna()
monthly_ret = monthly_nav.pct_change().dropna()
print(f'Monthly return matrix: {monthly_ret.shape}')

In [None]:
# ── Full-period static correlation heatmap ───────────────────────────────
corr = monthly_ret.corr()
labels = [ASSETS[c]['label'] for c in corr.columns]

mask = np.triu(np.ones_like(corr, dtype=bool), k=1)  # upper triangle only

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(
    corr,
    mask=mask,
    annot=True, fmt='.2f',
    cmap='RdBu_r', center=0, vmin=-1, vmax=1,
    xticklabels=labels, yticklabels=labels,
    linewidths=0.5, linecolor='white',
    square=True, ax=ax,
)
ax.set_title('Full-Period Pearson Correlation of Monthly Returns\n(Jan 2013 – Dec 2025)', fontsize=12, fontweight='bold')
fig.tight_layout()
plt.show()

print('\nKey observations:')
print(f"  Gold vs Equity:    {corr.loc['gold','equity']:.3f}")
print(f"  Gilt vs Corp Bond: {corr.loc['gilt','corp_bond']:.3f}")
print(f"  Equity vs Liquid:  {corr.loc['equity','liquid']:.3f}")
print(f"  Corp Bond vs Short Dur: {corr.loc['corp_bond','short_dur']:.3f}")

In [None]:
# ── Rolling 12-month correlation ─────────────────────────────────────────
PAIRS = [
    ('equity', 'gold',      'Equity vs Gold'),
    ('equity', 'gilt',      'Equity vs Govt Bonds'),
    ('equity', 'corp_bond', 'Equity vs Corp Bonds'),
    ('gilt',   'corp_bond', 'Govt Bonds vs Corp Bonds'),
    ('gold',   'gilt',      'Gold vs Govt Bonds'),
]

REGIMES = [
    ('2013-05', '2013-08', 'Taper Tantrum',      '#FEF3C7'),
    ('2018-09', '2018-12', 'IL&FS Default',      '#FEE2E2'),
    ('2020-02', '2020-05', 'COVID-19 Crash',     '#DBEAFE'),
    ('2022-01', '2022-12', 'Rate Hike Cycle',    '#F3E8FF'),
]

COLORS = ['#3B82F6','#F59E0B','#6366F1','#10B981','#EC4899']

fig, axes = plt.subplots(len(PAIRS), 1, figsize=(13, 4 * len(PAIRS)), sharex=True)

for ax, (a, b, title), color in zip(axes, PAIRS, COLORS):
    rolling_corr = monthly_ret[a].rolling(12).corr(monthly_ret[b]).dropna()

    # Shade regimes
    for start, end, regime_label, regime_color in REGIMES:
        ax.axvspan(pd.to_datetime(start), pd.to_datetime(end),
                   color=regime_color, alpha=0.6, zorder=0)

    ax.plot(rolling_corr.index, rolling_corr.values, color=color, linewidth=1.8)
    ax.axhline(0, color='gray', linestyle='--', linewidth=0.8)
    ax.axhline(rolling_corr.mean(), color=color, linestyle=':', linewidth=1, alpha=0.6)
    ax.set_ylim(-1, 1)
    ax.set_ylabel('Correlation')
    ax.set_title(f'Rolling 12M Correlation: {title}', fontsize=10, fontweight='bold')
    ax.yaxis.set_major_locator(plt.MultipleLocator(0.5))

# Regime legend
patches = [mpatches.Patch(color=c, label=l, alpha=0.6) for _, _, l, c in REGIMES]
axes[0].legend(handles=patches, loc='upper right', fontsize=8)

axes[-1].set_xlabel('Date')
fig.suptitle('Rolling 12-Month Pairwise Correlations (Monthly Returns)', fontsize=13, fontweight='bold', y=1.005)
fig.tight_layout()
plt.show()

In [None]:
# ── Regime sub-period correlation tables ─────────────────────────────────
periods = {
    'Full 2013–2025':    ('2013-01', '2025-12'),
    'Pre-COVID 2013–19': ('2013-01', '2019-12'),
    'COVID 2020':        ('2020-01', '2020-12'),
    'Rate Hikes 2022':   ('2022-01', '2022-12'),
    'Post-COVID 2021–25':('2021-01', '2025-12'),
}

pair_labels = [f"{a.capitalize()} / {b.replace('_',' ').capitalize()}" for a, b, _ in PAIRS]
results = {}
for period_label, (s, e) in periods.items():
    subset = monthly_ret.loc[s:e]
    row = {label: subset[a].corr(subset[b]) for (a, b, _), label in zip(PAIRS, pair_labels)}
    results[period_label] = row

df_regimes = pd.DataFrame(results).T.round(3)
print('\nCorrelation by regime period:')
df_regimes