In [4]:
import pandas as pd
import numpy as np

data = pd.read_csv("Bonds.csv")
data["Maturity Date"] = pd.to_datetime(data["Maturity Date"])
data["Trade Date"] = pd.to_datetime(data["Trade Date"])

data

Unnamed: 0,ISIN,Coupon,Issue Date,Maturity Date,Trade Date,Close Price
0,CA135087E679,1.50%,7/21/2015,2026-06-01,2026-01-05,99.680
1,CA135087E679,1.50%,7/21/2015,2026-06-01,2026-01-06,99.690
2,CA135087E679,1.50%,7/21/2015,2026-06-01,2026-01-07,99.690
3,CA135087E679,1.50%,7/21/2015,2026-06-01,2026-01-08,99.700
4,CA135087E679,1.50%,7/21/2015,2026-06-01,2026-01-09,99.700
...,...,...,...,...,...,...
415,CA135087XG49,5.75%,10/15/2001,2033-06-01,2026-01-12,116.340
416,CA135087XG49,5.75%,10/15/2001,2033-06-01,2026-01-13,116.290
417,CA135087XG49,5.75%,10/15/2001,2033-06-01,2026-01-14,116.533
418,CA135087XG49,5.75%,10/15/2001,2033-06-01,2026-01-15,116.950


In [5]:
selected_bonds = data[
    data["Maturity Date"].isin(
        pd.to_datetime(
            [
                "2026-09-01",
                "2027-03-01",
                "2027-09-01",
                "2028-03-01",
                "2028-09-01",
                "2029-03-01",
                "2029-09-01",
                "2030-03-01",
                "2030-09-01",
                "2031-03-01",
            ]
        )
    )
].copy()

selected_bonds["Maturity Period"] = (
    selected_bonds["Maturity Date"].rank(method="dense").astype(int)
)

selected_bonds["Coupon"] = (
    selected_bonds["Coupon"].str.rstrip("%").astype(float).astype(float)
) / 2

selected_bonds

Unnamed: 0,ISIN,Coupon,Issue Date,Maturity Date,Trade Date,Close Price,Maturity Period
60,CA135087L930,0.500,4/16/2021,2026-09-01,2026-01-05,99.150,1
61,CA135087L930,0.500,4/16/2021,2026-09-01,2026-01-06,99.150,1
62,CA135087L930,0.500,4/16/2021,2026-09-01,2026-01-07,99.170,1
63,CA135087L930,0.500,4/16/2021,2026-09-01,2026-01-08,99.160,1
64,CA135087L930,0.500,4/16/2021,2026-09-01,2026-01-09,99.190,1
...,...,...,...,...,...,...,...
345,CA135087T388,1.375,4/10/2025,2030-09-01,2026-01-12,99.210,9
346,CA135087T388,1.375,4/10/2025,2030-09-01,2026-01-13,99.185,9
347,CA135087T388,1.375,4/10/2025,2030-09-01,2026-01-14,99.355,9
348,CA135087T388,1.375,4/10/2025,2030-09-01,2026-01-15,99.365,9


# Compute dirty price

In [7]:
def compute_dirty_price(df):
    """
    Compute the dirty price of bonds in a dataframe.

    Dirty Price = Clean Price + Accrued Interest

    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame containing bond data with columns:
        - ISIN: Bond identifier
        - Coupon: Semi-annual coupon rate (as percentage)
        - Issue Date: Bond issue date
        - Maturity Date: Bond maturity date
        - Trade Date: Trade date
        - Close Price: Clean price of the bond
        - Maturity Period: Period identifier

    Returns:
    --------
    pd.DataFrame : Cleaned dataframe with added 'Dirty Price' column
    """
    # Create a copy to avoid modifying the original
    df_clean = df.copy()

    # Ensure date columns are datetime
    df_clean["Issue Date"] = pd.to_datetime(df_clean["Issue Date"])
    df_clean["Maturity Date"] = pd.to_datetime(df_clean["Maturity Date"])
    df_clean["Trade Date"] = pd.to_datetime(df_clean["Trade Date"])

    # Calculate dirty price for each bond
    dirty_prices = []

    for idx, row in df_clean.iterrows():
        trade_date = row["Trade Date"]
        issue_date = row["Issue Date"]
        maturity_date = row["Maturity Date"]
        clean_price = row["Close Price"]
        coupon_rate = row["Coupon"]  # Already as semi-annual percentage

        # Find the last coupon date before trade date
        # Coupons are paid semi-annually (every 6 months)
        last_coupon_date = issue_date
        current_coupon_date = issue_date

        while current_coupon_date < trade_date:
            last_coupon_date = current_coupon_date
            current_coupon_date = current_coupon_date + pd.DateOffset(months=6)

        # Calculate days since last coupon payment
        days_since_last_coupon = (trade_date - last_coupon_date).days

        # Calculate total days in the coupon period
        next_coupon_date = last_coupon_date + pd.DateOffset(months=6)
        days_in_period = (next_coupon_date - last_coupon_date).days

        # Calculate accrued interest
        accrued_interest = coupon_rate * (days_since_last_coupon / days_in_period)

        # Dirty Price = Clean Price + Accrued Interest
        dirty_price = clean_price + accrued_interest
        dirty_prices.append(dirty_price)

    # Add dirty price column
    df_clean["Dirty Price"] = dirty_prices

    return df_clean

In [8]:
cleaned_bonds = compute_dirty_price(selected_bonds)
cleaned_bonds.to_csv("Selected_Bonds.csv", index=False)

In [9]:
import math
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, date
from dateutil.relativedelta import relativedelta

selected_bonds = pd.read_csv("Selected_Bonds.csv")


def build_lists(selected_bonds):
    """
    Build lists of bond in a specified format for processing.
    Return a dictionary of list of tuples, where each key is a trade date and
    each list is a list of bonds represented as tuples of
    (price, coupon_rate, maturity_period).
    """
    bond_dict = {}
    for trade_date in selected_bonds["Trade Date"].unique():
        bonds_on_date = selected_bonds[selected_bonds["Trade Date"] == trade_date]
        bond_list = []
        for _, row in bonds_on_date.iterrows():
            bond_tuple = (
                row["Dirty Price"],
                row["Coupon"],
                row["Maturity Period"],
            )
            bond_list.append(bond_tuple)
        bond_dict[trade_date] = bond_list
    return bond_dict


bond_dict = build_lists(selected_bonds)
bond_dict

{'2026-01-05': [(99.37252747252748, 0.5, 1),
  (98.8815934065934, 0.625, 2),
  (99.53071978021978, 1.375, 10),
  (100.62262430939226, 1.375, 3),
  (102.46076923076924, 1.75, 4),
  (102.01857142857143, 1.625, 5),
  (104.55307692307692, 2.0, 6),
  (103.07576923076924, 1.75, 7),
  (100.20316483516484, 1.375, 8),
  (99.82228021978024, 1.375, 9)],
 '2026-01-06': [(99.37527472527474, 0.5, 1),
  (98.91502747252748, 0.625, 2),
  (99.44827472527471, 1.375, 10),
  (100.71022099447514, 1.375, 3),
  (102.5203846153846, 1.75, 4),
  (102.0975, 1.625, 5),
  (104.63406593406592, 2.0, 6),
  (103.1953846153846, 1.75, 7),
  (100.14071978021978, 1.375, 8),
  (99.74983516483516, 1.375, 9)],
 '2026-01-07': [(99.39802197802196, 0.5, 1),
  (98.94846153846152, 0.625, 2),
  (99.62082967032968, 1.375, 10),
  (100.697817679558, 1.375, 3),
  (102.53, 1.75, 4),
  (102.09642857142858, 1.625, 5),
  (104.65505494505494, 2.0, 6),
  (103.245, 1.75, 7),
  (100.28827472527472, 1.375, 8),
  (99.91739010989012, 1.375, 9)],


In [14]:
def calculate_ytm(bonds, face=100, annualize=True, periods_per_year=2):
    """
    Calculate yield to maturity for a list of bonds.
    Each bond is represented as a tuple of (price, coupon_rate, maturity_period).

    Parameters:
    -----------
    bonds : list of tuples
        Each tuple is (dirty_price, semi-annual_coupon, maturity_period)
    face : float
        Face value of the bond (default 100)
    annualize : bool
        If True, returns annualized YTM using Bond Equivalent Yield (BEY = per-period rate × periods_per_year)
        If False, returns per-period YTM (default True for annual rates)
    periods_per_year : int
        Number of compounding periods per year (default 2 for semi-annual)

    Returns:
    --------
    numpy array of YTMs (annual rates if annualize=True, per-period rates if annualize=False)

    Note:
    -----
    The calculation uses semi-annual compounding internally, then converts to annual
    rate at the end by multiplying by 2 (Bond Equivalent Yield convention).
    """

    def price_from_ytm(y, c, T):
        """Calculate the price of a bond given its yield to maturity (ytm), coupon rate, and maturity.
        It use the formula of present value of future cash flows.
        PV = sum_{t=1..T} c/(1+y)^t + face/(1+y)^T

        Note: y, c, and T should all be in the same period (e.g., semi-annual)
        """
        if y <= -0.999999999999:
            return np.inf
        df = 1.0 / (1.0 + y)
        pv_coupons = c * (df * (1.0 - df**T) / (1.0 - df)) if c != 0 else 0.0
        pv_face = face * (df**T)
        return pv_coupons + pv_face

    ytms = np.zeros(len(bonds), dtype=float)

    for i, (price, coupon_rate, maturity) in enumerate(
        sorted(bonds, key=lambda x: x[2])
    ):
        P = float(price)
        c = float(coupon_rate)
        T = int(maturity)

        # Handle degenerate cases
        if T <= 0:
            ytms[i] = np.nan
            continue
        if P <= 0:
            ytms[i] = np.nan
            continue

        # Define root function
        def f(y):
            return price_from_ytm(y, c, T) - P

        # Bracket the root for bisection
        lo, hi = -0.95, 1.0  # start with -95% to 100%
        f_lo, f_hi = f(lo), f(hi)

        # Expand hi if needed
        if f_lo * f_hi > 0:
            for new_hi in [2.0, 5.0, 10.0]:
                f_hi = f(new_hi)
                if f_lo * f_hi <= 0:
                    hi = new_hi
                    break

        # If still not bracketed, return NaN (or you can raise)
        if f_lo * f_hi > 0:
            ytms[i] = np.nan
            continue

        # Bisection solve
        for _ in range(200):
            mid = 0.5 * (lo + hi)
            f_mid = f(mid)

            if abs(f_mid) < 1e-12:
                ytms[i] = mid
                break

            if f_lo * f_mid <= 0:
                hi = mid
                f_hi = f_mid
            else:
                lo = mid
                f_lo = f_mid
        else:
            ytms[i] = 0.5 * (lo + hi)

    # Annualize if requested (using Bond Equivalent Yield convention)
    if annualize:
        ytms = ytms * periods_per_year

    return ytms


# Calculate annualized YTM (using semi-annual compounding, then multiplying by 2)
calculate_ytm(bond_dict["2026-01-05"])

array([0.02269184, 0.02388481, 0.02325228, 0.0223505 , 0.02413112,
       0.02417458, 0.02575366, 0.0269608 , 0.02792301, 0.02851372])

In [15]:
ytm_dict = {}
for date_key, bonds in bond_dict.items():
    ytm_dict[date_key] = calculate_ytm(bonds)

ytm_dict

{'2026-01-05': array([0.02269184, 0.02388481, 0.02325228, 0.0223505 , 0.02413112,
        0.02417458, 0.02575366, 0.0269608 , 0.02792301, 0.02851372]),
 '2026-01-06': array([0.02263592, 0.02354166, 0.02265751, 0.02204885, 0.02380801,
        0.02390063, 0.02540068, 0.02712639, 0.0280957 , 0.02869238]),
 '2026-01-07': array([0.02217304, 0.02319869, 0.02274169, 0.02200022, 0.0238124 ,
        0.02382968, 0.02525441, 0.02673529, 0.02769652, 0.02831864]),
 '2026-01-08': array([0.0223206 , 0.02306093, 0.02248658, 0.02185047, 0.02365315,
        0.02369117, 0.02531449, 0.02688747, 0.02785711, 0.02844298]),
 '2026-01-09': array([0.0216545 , 0.02302572, 0.02250288, 0.02185241, 0.02365753,
        0.02368782, 0.02537459, 0.02665027, 0.02763677, 0.02823188]),
 '2026-01-12': array([0.02169026, 0.02292011, 0.02221269, 0.02160554, 0.02342536,
        0.02347517, 0.02517167, 0.02672794, 0.02768992, 0.02838405]),
 '2026-01-13': array([0.02143108, 0.0227824 , 0.02229679, 0.02170855, 0.0235115 ,
      

In [16]:
import numpy as np
import plotly.graph_objects as go

# If you already have ytms computed per date, make sure each ytm list
# corresponds to bonds sorted by maturity for that same date.

fig = go.Figure()

# Sort dates so lines appear in order
for date_key in sorted(bond_dict.keys()):
    bonds = bond_dict[date_key]

    # Sort bonds by maturity to get consistent x ordering
    bonds_sorted = sorted(bonds, key=lambda x: x[2])
    maturities = [b[2] / 2.0 for b in bonds_sorted]

    ytms = ytm_dict[date_key]

    fig.add_trace(
        go.Scatter(
            x=maturities,
            y=np.array(ytms, dtype=float) * 100.0,  # remove *100 if ytms already in %
            mode="lines+markers",
            name=date_key,
        )
    )

fig.update_layout(
    title="YTM Curves (Superimposed by Trade Date)",
    xaxis_title="Maturity (Years)",
    yaxis_title="YTM (%)",
)
fig.show()

In [18]:
def bootstrap_spot_rate_curve(bonds, face=100.0, annualize=True, periods_per_year=2):
    """
    Bootstraps spot rates from bonds using the bootstrap method.

    Parameters:
    -----------
    bonds : list of tuples
        Each tuple is (dirty_price, semi-annual_coupon, maturity_period)
        maturity_periods = 1,2,...  (e.g., half-year periods)
    face : float
        Face value of the bond (default 100)
    annualize : bool
        If True, returns annualized spot rates (per-period rate × periods_per_year)
        If False, returns per-period spot rates (default True for annual rates)
    periods_per_year : int
        Number of compounding periods per year (default 2 for semi-annual)

    Returns:
    --------
    numpy array of spot rates (annual rates if annualize=True, per-period rates if annualize=False)

    Note:
    -----
    The calculation uses semi-annual compounding internally, then converts to annual
    rate at the end by multiplying by 2 (Bond Equivalent Yield convention).
    """
    bonds_sorted = sorted(bonds, key=lambda x: x[2])
    n = len(bonds_sorted)
    spot_rates = np.zeros(n, dtype=float)

    for i, (price, coupon_per_period, maturity) in enumerate(bonds_sorted):
        P = float(price)
        c = float(coupon_per_period)
        T = int(maturity)

        # Cashflows at periods 1..T
        cash_flows = np.array([c] * (T - 1) + [face + c], dtype=float)

        # PV of known (earlier) cashflows using already-bootstrapped spot rates
        pv_known = 0.0
        for t in range(1, T):  # 1..T-1
            s_t = spot_rates[t - 1]  # spot for period t (per-period rate)
            pv_known += cash_flows[t - 1] / ((1.0 + s_t) ** t)

        # Remaining PV allocated to the last cashflow at period T
        pv_last = P - pv_known

        if pv_last <= 0:
            print(
                f"Warning: Implied PV for last cashflow is non-positive at maturity {T}. "
                f"Using previous spot rate as fallback."
            )
            spot_rates[T - 1] = spot_rates[T - 2] if T - 2 >= 0 else np.nan
            continue

        # Solve discount factor at T: pv_last = CF_T / (1+s_T)^T
        cf_T = cash_flows[-1]
        spot_rate_T = (cf_T / pv_last) ** (1.0 / T) - 1.0

        spot_rates[T - 1] = spot_rate_T

    # Annualize if requested (using Bond Equivalent Yield convention)
    if annualize:
        spot_rates = spot_rates * periods_per_year

    return spot_rates


bootstrap_spot_rate_curve(bond_dict["2026-01-05"])

array([0.02269184, 0.02388856, 0.02324906, 0.02232376, 0.02416969,
       0.02421603, 0.02586929, 0.02710045, 0.02810253, 0.02871391])

In [19]:
fig = go.Figure()  # reset every run

for date_key in sorted(bond_dict.keys()):
    bonds_sorted = sorted(bond_dict[date_key], key=lambda x: x[2])

    spot_rates = bootstrap_spot_rate_curve(bonds_sorted)
    maturities = [b[2] for b in bonds_sorted]  # or b[2]/2.0 for half-year periods

    fig.add_trace(
        go.Scatter(
            x=maturities,
            y=np.array(spot_rates, dtype=float) * 100.0,
            mode="lines+markers",
            name=str(date_key).strip(),
        )
    )

fig.update_layout(
    title="Bootstrapped Spot Rate Curves (Superimposed by Trade Date)",
    xaxis_title="Maturity (Years)",
    yaxis_title="Spot Rate (%)",
)
fig.show()

### Calculate Forward Rates

In [None]:
def forward_rates_1y_start_from_spot(
    spot_rates,
    periods_per_year=2,
    start_years=1.0,
    horizons_years=(1, 2, 3, 4),
    spot_rates_annualized=True,
    annualize_output=True,
):
    """
    Calculate forward rates starting at a specified time, given spot rates.

    Parameters:
    -----------
    spot_rates : array
        Array of spot rates for periods k=1..N
    periods_per_year : int
        Number of compounding periods per year (default 2 for semi-annual)
    start_years : float
        Starting time in years (default 1.0)
    horizons_years : tuple
        Forward horizons in years (default (1, 2, 3, 4))
    spot_rates_annualized : bool
        If True, input spot_rates are annual rates that need to be converted to per-period
        If False, input spot_rates are already per-period rates (default True)
    annualize_output : bool
        If True, returns annualized forward rates (per-period rate × periods_per_year)
        If False, returns per-period forward rates (default True)

    Returns:
    --------
    dict : { "1y-1y": f, "1y-2y": f, ... } where f is the forward rate

    Note:
    -----
    The calculation uses semi-annual compounding internally, then converts to annual
    rate at the end by multiplying by 2 (Bond Equivalent Yield convention).
    """
    # Convert to per-period rates if input is annualized
    if spot_rates_annualized:
        spot_rates_per_period = spot_rates / periods_per_year
    else:
        spot_rates_per_period = spot_rates

    # Convert spot rates -> discount factors D_k
    # D_k = 1 / (1+s_k)^k
    D = np.array(
        [
            1.0 / ((1.0 + spot_rates_per_period[k - 1]) ** k)
            for k in range(1, len(spot_rates_per_period) + 1)
        ],
        dtype=float,
    )

    a = int(
        round(start_years * periods_per_year)
    )  # start index in periods (e.g., 1y -> 2 periods)
    forwards = {}

    for h in horizons_years:
        b = int(round((start_years + h) * periods_per_year))  # end in periods
        if b > len(D):
            forwards[f"1y-{h}y"] = np.nan
            continue

        # Forward discount factor over (a,b): DF_fwd = D_b / D_a
        DF_fwd = D[b - 1] / D[a - 1]

        # Per-period forward rate over n = (b-a) periods: (1+f)^n = 1/DF_fwd
        n = b - a
        f = (DF_fwd ** (-1.0 / n)) - 1.0

        # Annualize if requested
        if annualize_output:
            f = f * periods_per_year

        forwards[f"1y-{h}y"] = f

    return forwards

In [18]:
# ---- Plot forward curves superimposed by date ----
labels_order = ["1y-1y", "1y-2y", "1y-3y", "1y-4y"]
x_years = [1, 2, 3, 4]  # horizon length on x-axis (years)

fig = go.Figure()

for date_key in sorted(bond_dict.keys()):
    bonds_sorted = sorted(bond_dict[date_key], key=lambda x: x[2])

    # per-period spot rates from your fixed bootstrap
    spot_rates = bootstrap_spot_rate_curve(bonds_sorted)

    fwd = forward_rates_1y_start_from_spot(
        spot_rates, periods_per_year=2, start_years=1.0, horizons_years=(1, 2, 3, 4)
    )
    y = [fwd[k] * 100.0 for k in labels_order]  # percent

    fig.add_trace(
        go.Scatter(x=x_years, y=y, mode="lines+markers", name=str(date_key).strip())
    )

fig.update_layout(
    title="Forward Rates Starting in 1 Year (Superimposed by Trade Date)",
    xaxis_title="Forward Horizon (Years)  [1y-1y, 1y-2y, 1y-3y, 1y-4y]",
    yaxis_title="Forward Rate (%)",
)
fig.show()

#### YTM COV Matrix

In [21]:
def log_return_matrix(level_matrix):
    """
    level_matrix: shape (T, K) with strictly positive levels
    returns: shape (T-1, K) with log returns log(r_{t+1}/r_t)
    """
    X = np.asarray(level_matrix, dtype=float)
    if np.any(X <= 0):
        raise ValueError("All rates must be > 0 to take log-returns.")
    return np.log(X[1:, :] / X[:-1, :])


# ---- Build yield (YTM) matrix across dates ----
dates = sorted(bond_dict.keys())

# Use maturities 1..5 periods (or change this selection to match your Xi=1..5 definition)
# If your "maturity" is in half-year periods, 1..5 corresponds to 0.5y..2.5y.
maturity_targets = [1, 2, 3, 4, 5]

yield_levels = []
for d in dates:
    bonds_sorted = sorted(bond_dict[d], key=lambda x: x[2])

    ytms = np.array(calculate_ytm(bonds_sorted), dtype=float)  # decimals
    mats = np.array([b[2] for b in bonds_sorted], dtype=int)

    row = []
    for m in maturity_targets:
        idx = np.where(mats == m)[0]
        if len(idx) != 1:
            row.append(np.nan)
        else:
            row.append(ytms[idx[0]])
    yield_levels.append(row)

yield_levels = np.array(yield_levels, dtype=float)

# Drop dates with missing yields for these maturities
valid_rows = ~np.any(np.isnan(yield_levels), axis=1)
yield_levels = yield_levels[valid_rows, :]
yield_dates = [d for d, ok in zip(dates, valid_rows) if ok]

# Log-return time series Xi,j
yield_logrets = log_return_matrix(yield_levels)  # shape (T-1, 5)
cov_yields = np.cov(yield_logrets, rowvar=False, ddof=1)

cov_yields_df = pd.DataFrame(
    cov_yields,
    index=[f"X{i}" for i in maturity_targets],
    columns=[f"X{i}" for i in maturity_targets],
)

print("Covariance matrix of yield log-returns (Xi, i=1..5):")
print(cov_yields_df)

Covariance matrix of yield log-returns (Xi, i=1..5):
          X1        X2        X3        X4        X5
X1  0.000127 -0.000005 -0.000070 -0.000040 -0.000041
X2 -0.000005  0.000103  0.000032  0.000021  0.000030
X3 -0.000070  0.000032  0.000095  0.000050  0.000053
X4 -0.000040  0.000021  0.000050  0.000039  0.000035
X5 -0.000041  0.000030  0.000053  0.000035  0.000035


#### Forward COV Matrix

In [20]:
# ---- Build forward-rate matrix across dates ----
fwd_labels = ["1y-1y", "1y-2y", "1y-3y", "1y-4y"]

fwd_levels = []
fwd_dates = []
for d in dates:
    bonds_sorted = sorted(bond_dict[d], key=lambda x: x[2])
    spot = bootstrap_spot_rate_curve(bonds_sorted)

    fwd = forward_rates_1y_start_from_spot(
        spot_rates=spot,
        periods_per_year=2,
        start_years=1.0,
        horizons_years=(1, 2, 3, 4),
    )

    row = [fwd[k] for k in fwd_labels]
    if any((v is None) or np.isnan(v) for v in row):
        continue
    fwd_levels.append(row)
    fwd_dates.append(d)

fwd_levels = np.array(fwd_levels, dtype=float)

fwd_logrets = log_return_matrix(fwd_levels)  # shape (T-1, 4)
cov_forwards = np.cov(fwd_logrets, rowvar=False, ddof=1)

cov_forwards_df = pd.DataFrame(
    cov_forwards,
    index=fwd_labels,
    columns=fwd_labels,
)

print("\nCovariance matrix of forward-rate log-returns (1y-1y ... 1y-4y):")
print(cov_forwards_df)


Covariance matrix of forward-rate log-returns (1y-1y ... 1y-4y):
          1y-1y     1y-2y     1y-3y     1y-4y
1y-1y  0.000212  0.000110  0.000012 -0.000008
1y-2y  0.000110  0.000059  0.000005 -0.000006
1y-3y  0.000012  0.000005  0.000152  0.000129
1y-4y -0.000008 -0.000006  0.000129  0.000115


### Eigen Value and Vector

In [22]:
def eig_sorted(cov, labels):
    """
    Returns eigenvalues/eigenvectors sorted by descending eigenvalue.
    Eigenvectors are columns of V.
    """
    cov = np.asarray(cov, dtype=float)
    w, V = np.linalg.eigh(cov)  # for symmetric matrices (covariance)
    idx = np.argsort(w)[::-1]  # descending
    w = w[idx]
    V = V[:, idx]

    eigvals = pd.Series(w, index=[f"PC{i+1}" for i in range(len(w))], name="eigenvalue")
    eigvecs = pd.DataFrame(V, index=labels, columns=[f"PC{i+1}" for i in range(len(w))])
    return eigvals, eigvecs


def first_pc_one_sentence(eigvals, eigvecs, label_name="rates"):
    """
    Produces a single-sentence interpretation of the first principal component.
    """
    lam1 = float(eigvals.iloc[0])
    v1 = eigvecs.iloc[:, 0].values
    # Normalize sign so the largest-magnitude loading is positive (purely for readability)
    if v1[np.argmax(np.abs(v1))] < 0:
        v1 = -v1
    loadings = ", ".join([f"{lab}:{val:+.3f}" for lab, val in zip(eigvecs.index, v1)])
    return (
        f"The largest eigenvalue (PC1) is {lam1:.6g}, meaning the dominant source of variance in "
        f"daily log-returns of {label_name} is a single common factor whose loadings across maturities are "
        f"[{loadings}]."
    )


# ---- Use with your covariance matrices from Part 5 ----
# cov_yields_df, cov_forwards_df are from your previous step

# Labels for yields and forwards
yield_labels = list(cov_yields_df.index)  # e.g., ["X1","X2","X3","X4","X5"]
fwd_labels = list(cov_forwards_df.index)  # e.g., ["1y-1y","1y-2y","1y-3y","1y-4y"]

# Eigen-decompositions
eigvals_y, eigvecs_y = eig_sorted(cov_yields_df.values, yield_labels)
eigvals_f, eigvecs_f = eig_sorted(cov_forwards_df.values, fwd_labels)

print("=== YIELDS: eigenvalues ===")
print(eigvals_y)
print("\n=== YIELDS: eigenvectors (columns = PCs) ===")
print(eigvecs_y)

print("\n=== FORWARDS: eigenvalues ===")
print(eigvals_f)
print("\n=== FORWARDS: eigenvectors (columns = PCs) ===")
print(eigvecs_f)

# One-sentence interpretations for PC1
print("\nPC1 interpretation (yields):")
print(first_pc_one_sentence(eigvals_y, eigvecs_y, label_name="yields"))

print("\nPC1 interpretation (forwards):")
print(first_pc_one_sentence(eigvals_f, eigvecs_f, label_name="forward rates"))

=== YIELDS: eigenvalues ===
PC1    2.447447e-04
PC2    1.047855e-04
PC3    3.808890e-05
PC4    1.018811e-05
PC5    6.223221e-07
Name: eigenvalue, dtype: float64

=== YIELDS: eigenvectors (columns = PCs) ===
         PC1       PC2       PC3       PC4       PC5
X1 -0.591919  0.492382 -0.637242 -0.019544  0.027061
X2  0.274447  0.863596  0.409465 -0.041033 -0.097653
X3  0.575575  0.010572 -0.516684 -0.600313 -0.203135
X4  0.339855  0.034987 -0.330743  0.760665 -0.441897
X5  0.357103  0.102109 -0.223400  0.242793  0.867868

=== FORWARDS: eigenvalues ===
PC1    0.000271
PC2    0.000262
PC3    0.000002
PC4    0.000001
Name: eigenvalue, dtype: float64

=== FORWARDS: eigenvectors (columns = PCs) ===
            PC1       PC2       PC3       PC4
1y-1y -0.834444  0.293866  0.241071  0.399038
1y-2y -0.433571  0.161123 -0.345938 -0.816322
1y-3y -0.289732 -0.698363 -0.596686  0.268906
1y-4y -0.178256 -0.632433  0.682772 -0.319494

PC1 interpretation (yields):
The largest eigenvalue (PC1) is 0.00024