<a href="https://colab.research.google.com/github/bbcx-investments/notebooks/blob/main/fixed_income/principal_components.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from pandas_datareader import DataReader as pdr

files = ['DGS' + x for x in ['1', '2', '3', '5', '10', '20', '30']]
df = None

# reading market yield on U.S. treasury securities data from FRED
for f in files:
    d = pdr(f, 'fred', start=1920)
    df = pd.concat((df, d), axis=1)
df *= 100

# computing the monthly changes in yields (basis points) for each maturity securities
df.index.name = 'date'
df = df.reset_index()
df['month'] = df.date.dt.to_period('M').astype(str)
df = df.groupby('month').first()
df = df.drop(columns=['date'])
df = df.diff()
df = df.dropna(subset=['DGS30'])

columns = ['1-Year', '2-Year', '3-Year', '5-Year', '10-Year', '20-Year', '30-Year']
df.columns = columns

# the monthly changes in yields (basis points)
df

Unnamed: 0_level_0,1-Year,2-Year,3-Year,5-Year,10-Year,20-Year,30-Year
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1977-03,7.0,-4.0,-6.0,8.0,8.0,13.0,14.0
1977-04,-20.0,-14.0,-12.0,-7.0,-6.0,-5.0,-6.0
1977-05,28.0,13.0,6.0,2.0,2.0,2.0,2.0
1977-06,15.0,9.0,0.0,-7.0,-7.0,-6.0,-5.0
1977-07,-13.0,-12.0,-13.0,-16.0,-4.0,-10.0,-17.0
...,...,...,...,...,...,...,...
2022-02,38.0,40.0,35.0,26.0,18.0,14.0,11.0
2022-03,13.0,13.0,8.0,-7.0,-9.0,0.0,-1.0
2022-04,81.0,113.0,114.0,99.0,67.0,41.0,33.0
2022-05,38.0,29.0,32.0,46.0,60.0,66.0,63.0


In [None]:
# example year range
start = '1992'
stop = '2021'
df_1 = df.loc[start:stop]
# covariance matrix of the changes in yields
df1_cov = df_1.cov()
# eigendecomposition of the covariance matrix
D, C = np.linalg.eig(df1_cov)

# --------
# The loadings on the j-th factor are the elements in the j-th eigenvector, i.e., the j-th column of C
# --------
col_name_C = ['Factor ' + str(x + 1) for x in range(len(columns))]
row_name_C = columns
df_C = pd.DataFrame(C, index=row_name_C, columns=col_name_C)
df_C

Unnamed: 0,Factor 1,Factor 2,Factor 3,Factor 4,Factor 5,Factor 6,Factor 7
1-Year,0.342274,-0.548618,-0.621212,-0.414567,-0.040773,-0.100567,-0.110996
2-Year,0.424227,-0.331084,0.067092,0.504026,0.444021,0.499713,0.070734
3-Year,0.444899,-0.185949,0.293032,0.166461,-0.645002,-0.198104,0.445688
5-Year,0.432166,0.023086,0.394574,-0.008544,0.238187,-0.527237,-0.567649
10-Year,0.385865,0.325011,0.274403,-0.67043,0.068798,0.449076,0.119579
20-Year,0.29627,0.476899,-0.388081,0.293637,-0.423383,0.260057,-0.448418
30-Year,0.285498,0.470347,-0.377006,0.103006,0.380047,-0.391982,0.496423
