In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cufflinks as cf
cf.set_config_file(offline=True)
from sklearn.preprocessing import StandardScaler

In [2]:
# Reading the interest rate data

df = pd.read_csv("E:/Personal Projects/PCA on US Interest rates/daily-treasury-rates.csv")
df = df.set_index('Sr no')
df.head()

Unnamed: 0_level_0,1 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
Sr no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.02,0.02,0.11,0.25,0.66,1.07,1.61,1.92,2.12,2.41,2.69
2,0.02,0.03,0.1,0.26,0.68,1.06,1.57,1.85,2.04,2.32,2.6
3,0.02,0.03,0.1,0.25,0.65,1.02,1.5,1.78,1.97,2.25,2.52
4,0.02,0.03,0.09,0.25,0.62,1.0,1.47,1.76,1.96,2.25,2.52
5,0.01,0.03,0.08,0.23,0.62,1.0,1.5,1.81,2.03,2.33,2.59


In [13]:
# Plotting the yield curves

df.iplot(title='US Daily Treasury Par Yield Curve Rates',yTitle='Interest rate in %')

In [4]:
scaler = StandardScaler()
scaler.fit(df)

df1 = pd.DataFrame(scaler.transform(df),columns=df.columns)
df1.iplot(title='US Daily Treasury Par Yield Curve Rates - Scaled',yTitle='Scaled Interest Rate in %')


In [5]:
# Calculating the covariance matrix

cov_matrix_df1 = np.cov(df1, rowvar=False)
pd.DataFrame(cov_matrix_df1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1.004,0.928867,0.881066,0.840346,0.763968,0.694401,0.436444,0.331586,0.227609,0.104818,0.197971
1,0.928867,1.004,0.954428,0.914515,0.835313,0.758359,0.48496,0.380489,0.276599,0.148481,0.24697
2,0.881066,0.954428,1.004,0.96592,0.886939,0.783499,0.503516,0.416915,0.339557,0.221138,0.333658
3,0.840346,0.914515,0.96592,1.004,0.931275,0.838977,0.596678,0.530306,0.468648,0.354778,0.464621
4,0.763968,0.835313,0.886939,0.931275,1.004,0.960866,0.769743,0.691503,0.61084,0.488431,0.573197
5,0.694401,0.758359,0.783499,0.838977,0.960866,1.004,0.893201,0.804536,0.69365,0.557006,0.58959
6,0.436444,0.48496,0.503516,0.596678,0.769743,0.893201,1.004,0.975216,0.896466,0.79448,0.758103
7,0.331586,0.380489,0.416915,0.530306,0.691503,0.804536,0.975216,1.004,0.974068,0.911951,0.875845
8,0.227609,0.276599,0.339557,0.468648,0.61084,0.69365,0.896466,0.974068,1.004,0.976446,0.958976
9,0.104818,0.148481,0.221138,0.354778,0.488431,0.557006,0.79448,0.911951,0.976446,1.004,0.978695


In [6]:
# Finding the eigen values

eigenvalues, eigenvectors = np.linalg.eig(cov_matrix_df1)

# Sorting the eigen values, as the eigen values with the largest value explain the largest variation

sorted_index = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[sorted_index]
eigenvectors = eigenvectors[:,sorted_index]

df_eigenvalues = pd.DataFrame({"Eigenvalues": eigenvalues})
df_eigenvectors = pd.DataFrame(eigenvectors)

df_eigenvalues['Explained Proportion'] = (df_eigenvalues['Eigenvalues']/np.sum(df_eigenvalues['Eigenvalues']))*100

df_eigenvalues['Explained Proportion'].iplot(kind='bar', title='Percentage of overall variation explained by the eigenvalues')

In [7]:
# Visualizing the first few eigenvectors

first_4_eigenvectors = pd.DataFrame(eigenvectors[:,0:4],columns=['EV1','EV2','EV3','EV4'])

first_4_eigenvectors.iplot(title="First four eigenvectors")


In [8]:
df1.to_numpy().shape

(251, 11)

In [9]:
df_eigenvectors.to_numpy().shape

(11, 11)

In [10]:
PC = np.matmul(df1.to_numpy(),df_eigenvectors.to_numpy())
PC = pd.DataFrame(PC)

PC.head()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.619991,0.221795,1.325901,0.015269,0.020449,-0.15891,-0.099757,-0.227633,0.117944,0.095022,0.037881
1,1.156795,-0.508662,1.411687,0.219423,-0.08274,-0.024545,0.062506,-0.210163,0.012818,0.109428,0.000281
2,1.986935,-1.061889,1.219134,0.227019,-0.068604,-0.017798,0.05311,-0.166547,0.029552,0.06114,0.003237
3,2.270158,-1.089059,1.007142,0.136607,-0.028234,-0.034658,0.127043,-0.126374,0.108747,0.048519,0.002337
4,1.90765,-0.403042,0.955801,0.078183,0.118318,0.076928,0.087773,-0.173997,0.075091,-0.022586,-0.030929


In [11]:
PC1 = PC.iloc[:,0:1]
PC1.iplot(title="PC1")