<a href="https://colab.research.google.com/github/shatlykgurdov/3.1.2/blob/main/gwp1_tasks_2-4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================================================
# MScFE 600 – Group Work Project 1
# Tasks 2–4 Combined Notebook
# ==========================================================


# ==========================================================
# Task 2 – Yield Curve Modeling
# ==========================================================
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.interpolate import CubicSpline

# (a) Example maturities and yields
maturities = np.array([0.5, 2, 5, 10, 20, 30])
yields_pct = np.array([4.5, 4.7, 4.9, 5.1, 5.3, 5.4])

# (b) Stable Nelson–Siegel model
def _ns_terms_stable(tau, lam):
    x = tau / lam
    term1 = np.empty_like(x, dtype=float)
    small = np.abs(x) < 1e-6
    term1[~small] = (-np.expm1(-x[~small])) / x[~small]
    term1[small] = 1 - x[small]/2 + x[small]**2/6
    term2 = term1 - np.exp(-x)
    return term1, term2

def nelson_siegel(tau, beta0, beta1, beta2, lam):
    t1, t2 = _ns_terms_stable(tau, lam)
    return beta0 + beta1 * t1 + beta2 * t2

p0 = [np.mean(yields_pct), -1.0, 1.0, 2.0]
bounds = ([0.0, -10.0, -10.0, 0.05], [20.0, 10.0, 10.0, 50.0])

params, _ = curve_fit(nelson_siegel, maturities, yields_pct, p0=p0, bounds=bounds, maxfev=20000)
beta0, beta1, beta2, lam = params

# (c) Cubic Spline
cs = CubicSpline(maturities, yields_pct, bc_type='not-a-knot')

# (d) Plot comparison
tau_fit = np.linspace(maturities.min(), maturities.max(), 400)
ns_fit = nelson_siegel(tau_fit, *params)
spline_fit = cs(tau_fit)

plt.figure(figsize=(9,6))
plt.scatter(maturities, yields_pct, label="Observed Yields", zorder=3)
plt.plot(tau_fit, ns_fit, label="Nelson–Siegel Fit", linewidth=2)
plt.plot(tau_fit, spline_fit, label="Cubic Spline Fit", linestyle="--", linewidth=2)
plt.xlabel("Maturity (Years)"); plt.ylabel("Yield (%)")
plt.title("Task 2: Yield Curve — Nelson–Siegel vs Cubic Spline")
plt.legend(); plt.tight_layout(); plt.show()

print("Nelson–Siegel parameters:")
print(f"  β0 (level)     = {beta0:.6f}")
print(f"  β1 (slope)     = {beta1:.6f}")
print(f"  β2 (curvature) = {beta2:.6f}")
print(f"  λ  (decay)     = {lam:.6f}")


# ==========================================================
# Task 3 – PCA Analysis
# ==========================================================
from numpy.linalg import eigh

# Part 1: Simulated uncorrelated series
np.random.seed(0)
X = np.random.normal(0, 0.01, size=(250, 5))
cols = [f"Y{i+1}" for i in range(5)]
df_sim = pd.DataFrame(X, columns=cols)

C = np.corrcoef(df_sim.T)
eigvals, eigvecs = eigh(C)
idx = eigvals.argsort()[::-1]
eigvals = eigvals[idx]; eigvecs = eigvecs[:, idx]
var_exp = eigvals / eigvals.sum()

print("\nTask 3a–d: Variance explained (simulated):", np.round(var_exp, 3))

plt.figure(figsize=(6,4))
plt.plot(range(1, len(var_exp)+1), var_exp, marker="o")
plt.xlabel("Principal Component"); plt.ylabel("Variance Explained")
plt.title("Task 3a–d: Screeplot – Simulated (Uncorrelated)")
plt.tight_layout(); plt.show()


# Part 2: Realistic yield data (FRED if available, otherwise simulated fallback)
from datetime import date, timedelta
try:
    import pandas_datareader.data as pdr
    end = date.today(); start = end - timedelta(days=210)
    fred_series = {"3M":"DGS3MO","2Y":"DGS2","5Y":"DGS5","10Y":"DGS10","30Y":"DGS30"}
    frames = []
    for label, code in fred_series.items():
        s = pdr.DataReader(code, "fred", start, end)
        if isinstance(s, pd.DataFrame): s = s.iloc[:,0]
        frames.append(s.rename(label))
    yields = pd.concat(frames, axis=1).ffill().dropna()
    source = "FRED"
except Exception:
    np.random.seed(123)
    n = 126; maturities = ["3M","2Y","5Y","10Y","30Y"]
    epsL = np.random.normal(0, 0.015, n); epsS = np.random.normal(0, 0.02, n); epsC = np.random.normal(0, 0.012, n)
    L, S, Cc = np.cumsum(epsL), np.cumsum(epsS), np.cumsum(epsC)
    load = {"3M":(1,1,-0.2),"2Y":(1,0.4,0.4),"5Y":(1,0,1),"10Y":(1,-0.3,0.6),"30Y":(1,-0.6,-0.1)}
    base = {"3M":5.2,"2Y":4.9,"5Y":4.7,"10Y":4.6,"30Y":4.5}
    Y = pd.DataFrame(index=np.arange(n), columns=maturities, dtype=float)
    for t in range(n):
        for m in maturities:
            a,b,c = load[m]; Y.loc[t,m] = base[m] + a*L[t] + b*S[t] + c*Cc[t]
    yields = Y
    source = "Simulated fallback"

dy = yields.diff().dropna()
C_real = np.corrcoef(dy.T)
eigvals_r, eigvecs_r = eigh(C_real)
idx = eigvals_r.argsort()[::-1]
eigvals_r = eigvals_r[idx]; eigvecs_r = eigvecs_r[:, idx]
var_exp_r = eigvals_r / eigvals_r.sum()

print(f"Task 3e–j ({source}): Variance explained:", np.round(var_exp_r, 3))

plt.figure(figsize=(6,4))
plt.plot(range(1, len(var_exp_r)+1), var_exp_r, marker="o")
plt.xlabel("Principal Component"); plt.ylabel("Variance Explained")
plt.title(f"Task 3e–j: Screeplot – Gov’t Yield Changes ({source})")
plt.tight_layout(); plt.show()


# ==========================================================
# Task 4 – ETF Analysis
# ==========================================================
import yfinance as yf

tickers = ["SPY", "TLT", "GLD"]
START, END = "2022-01-01", "2023-01-01"

df = yf.download(tickers, start=START, end=END, auto_adjust=True, progress=False)

if isinstance(df.columns, pd.MultiIndex):
    prices = df['Close']
else:
    prices = df[['Close']] if 'Close' in df.columns else df.copy()
    if prices.shape[1] == 1: prices.columns = [tickers[0]]

returns = prices.pct_change().dropna()

mean_ann = returns.mean() * 252
vol_ann  = returns.std() * np.sqrt(252)
sharpe   = mean_ann / vol_ann

metrics = pd.DataFrame({
    "Mean Return (ann.)": mean_ann,
    "Volatility (ann.)": vol_ann,
    "Sharpe Ratio": sharpe
}).round(4)

print("\nTask 4 – Annualized Performance Metrics:\n", metrics)
print("\nTask 4 – Correlation Matrix:\n", returns.corr().round(3))

plt.figure(figsize=(6,5))
plt.imshow(returns.corr(), cmap="coolwarm", interpolation="none")
plt.xticks(range(len(returns.corr())), returns.columns, rotation=45)
plt.yticks(range(len(returns.corr())), returns.columns)
plt.colorbar(label="Correlation")
plt.title("Task 4: ETF Correlation Matrix (Daily Returns)")
plt.tight_layout(); plt.show()
