In [None]:
# Task 3 – Part 2: Real government yields (FRED) + PCA + screeplot (fixed)
!pip -q install pandas_datareader

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader import data as pdr
from datetime import date, timedelta
from numpy.linalg import eigh

# e) Collect daily constant-maturity Treasury yields (FRED) for ~6–7 months
end = date.today()
start = end - timedelta(days=210)   # get enough business days for PCA

fred_series = {
    "3M":  "DGS3MO",
    "2Y":  "DGS2",
    "5Y":  "DGS5",
    "10Y": "DGS10",
    "30Y": "DGS30"
}

# Robust download: fetch each series, rename, concat
frames = []
for label, fred_code in fred_series.items():
    s = pdr.DataReader(fred_code, "fred", start, end)      # returns a Series/DataFrame with a single column
    if isinstance(s, pd.DataFrame):
        # If FRED returns a 1-col DataFrame, squeeze to Series
        s = s.iloc[:, 0]
    s = s.rename(label)
    frames.append(s)

yields = pd.concat(frames, axis=1).sort_index()
yields = yields.ffill().dropna(how="any")  # forward-fill holidays, drop if still missing

# f) Daily yield changes
dy = yields.diff().dropna()

# g) PCA on correlation matrix
C_real = np.corrcoef(dy.T)
eigvals_r, eigvecs_r = eigh(C_real)     # ascending order
idx = eigvals_r.argsort()[::-1]
eigvals_r = eigvals_r[idx]
eigvecs_r = eigvecs_r[:, idx]

# h) Variance explained
var_exp_r = eigvals_r / eigvals_r.sum()
print("Variance explained (real yields):", np.round(var_exp_r, 3))

# i) Screeplot
plt.figure(figsize=(6,4))
plt.plot(range(1, len(var_exp_r)+1), var_exp_r, marker="o")
plt.xticks(range(1, len(var_exp_r)+1))
plt.xlabel("Principal Component")
plt.ylabel("Variance Explained")
plt.title("Screeplot – Real Gov’t Yield Changes (FRED)")
plt.tight_layout()
plt.show()

# j) Brief comparison notes
print("\nComparison notes:")
print("- Simulated uncorrelated data: PCs explain roughly equal variance.")
print("- Real Treasury yields: PC1 usually dominates (level), followed by slope/curvature PCs.")
