In [None]:
#!pip install scikit-learn

In [None]:
# Data Sources
from fredapi import Fred

# Data Wrangling
import pandas as pd 
import numpy as np
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler

# Visuals 
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# Modeling 


# Set style 
plt.style.use('fivethirtyeight')
# plt.style.available

# Pull Data from FRED API 

In [None]:
fred = Fred(api_key = 'd8c837149005fd272f94b30f77dad8fa')

In [None]:
# 10YT, monthly
t10yr = fred.get_series('DGS10', observation_start="1970-01-01", frequency='m')

In [None]:
# GDP, quarterly 
gdp = fred.get_series('GDP', observation_start="1970-01-01")

In [None]:
# CPI, monthly 
cpi = fred.get_series('CPALTT01USM657N', observation_start="1970-01-01")

In [None]:
# Unemployment, monthly 
unemp = fred.get_series('UNRATE', observation_start="1970-01-01")

In [None]:
# Deliquency Rate on Credit Card Loans, quarterly 
default = fred.get_series('DRCCLACBS', observation_start="1970-01-01")

In [None]:
# Retail Sales, monthly 
retail = fred.get_series('MRTSSM44000USS', observation_start="1970-01-01")

In [None]:
# PPI, monthly 
ppi = fred.get_series('PPIACO', observation_start="1970-01-01")

In [None]:
# Personal Savings, monthly
saving = fred.get_series('PSAVERT', observation_start="1970-01-01")

In [None]:
# Median Sales Price of Houses Sold, quarterly 
housing = fred.get_series('MSPUS', observation_start="1970-01-01")

In [None]:
# price of oil, monthly
oil = fred.get_series('DCOILWTICO', observation_start="1970-01-01", frequency='m')

In [None]:
# federal funds rate, monthly 
ffr = fred.get_series('DFF', observation_start="1970-01-01", frequency='m')

In [None]:
# USD to EU exchange rate, monthly 
euexch = fred.get_series('DEXUSEU', observation_start="1970-01-01", frequency='m')

In [None]:
# USD to Chinese Yuan exchange rate, monthly 
chexch = fred.get_series('DEXCHUS', observation_start="1970-01-01", frequency='m')

In [None]:
# USD to Russian Ruble, monthly 
ruexch = fred.get_series('CCUSMA02RUM618N', observation_start="1970-01-01", frequency='m')

# Disaggregate All Data to Monthly 

In [None]:
# Quarterly GDP 
plt.plot(gdp)

In [None]:
# interpolate to months
gdp = gdp.resample('MS').interpolate(method='spline', order=2)
plt.plot(gdp)

In [None]:
# credit deliquency, quarterly
plt.plot(default)

In [None]:
# interpolate to months
default = default.resample('MS').interpolate(method='linear', order=1)
plt.plot(default)

In [None]:
# housing, quarterly
plt.plot(housing)

In [None]:
# interpolate to months
housing = housing.resample('MS').interpolate(method='spline', order=2)
plt.plot(housing)

# Combine into Single Data Frame

In [None]:
data = pd.DataFrame(t10yr)
data.columns = ['t10yr']

# convert index to datatime 
data.index = pd.to_datetime(data.index,format='%Y-%m-%d')

# fitler 
data = data.loc[data.index <= datetime(2024, 3, 1), :]

# gdp 
data = data.join(pd.DataFrame(gdp, columns=['gdp']))
# cpi 
data = data.join(pd.DataFrame(cpi, columns=['cpi']))
#unemp
data = data.join(pd.DataFrame(unemp, columns=['unemp']))
#default
data = data.join(pd.DataFrame(default, columns=['default']))
#retail
data = data.join(pd.DataFrame(retail, columns=['retail']))
#ppi
data = data.join(pd.DataFrame(ppi, columns=['ppi']))
#saving
data = data.join(pd.DataFrame(saving, columns=['saving']))
#housing
data = data.join(pd.DataFrame(housing, columns=['housing']))
#oil
data = data.join(pd.DataFrame(oil, columns=['oil']))
#ffr
data = data.join(pd.DataFrame(ffr, columns=['ffr']))
#euexch
data = data.join(pd.DataFrame(euexch, columns=['euexch']))
#chexch
data = data.join(pd.DataFrame(chexch, columns=['chexch']))
#ruexch
data = data.join(pd.DataFrame(ruexch, columns=['ruexch']))


# create election year indicator 
years = [i for i in range(2000, 2025)]
elections = [1 if yr % 4 == 0 else 0 for yr in years]
election_yrs = pd.DataFrame(zip(years, elections), columns=['yr', 'election_ind'])

# join to data
#index = data.index
#data['yr'] = index.year
#data = data.merge(election_yrs, on='yr')
#data = data.drop('yr', axis=1)
#data.index = index

data.head()

- **Add description of each var**

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data.dtypes

# EDA

In [None]:
# line plots 
col_names = data.columns
num_vars = len(col_names)

fig = plt.figure(figsize=(20, 50))
for i in range(num_vars):
  ax = fig.add_subplot(num_vars,1,i+1)
  ax.plot(data.iloc[:,i],label=col_names[i])
  data.iloc[:,i].rolling(6).mean().plot(label='Rolling Mean')
  ax.set_title(col_names[i])
  ax.set_xlabel('Date')
  ax.set_ylabel('Value')
  plt.legend()
fig.tight_layout(pad=3.0)
plt.show()

In [None]:
# box plot of monthly average 10YT 
plt.figure(figsize=(20,8))
ax = sns.boxplot(x=data.index.year, y=data['t10yr'])
ax.set_title('Box Plot by Year 10YT',fontsize=30)

In [None]:
# lag plot to see autocorrelation 
plt.figure(figsize=(10,10))
t10yr = data['t10yr']
n_lags = 8
cols = [t10yr]

for i in range(1,(n_lags + 1)):
	cols.append(t10yr.shift(i))
df = pd.concat(cols, axis=1)
cols = ['t+1']
for i in range(1,(n_lags + 1)):
	cols.append('t-' + str(i))
df.columns = cols
plt.figure(1)
for i in range(1,(n_lags + 1)):
	ax = plt.subplot(240 + i)
	ax.set_title('t+1 vs t-' + str(i))
	plt.scatter(x=df['t+1'].values, y=df['t-'+str(i)].values)
plt.tight_layout(pad=2)
plt.show()

In [None]:
# scale data 
scaler = MinMaxScaler(feature_range=(0,1))
data_scaled = scaler.fit_transform(data)

In [None]:
# compare 10YT and features 
col_names = data.columns
scaled_features = data_scaled
fig = plt.figure(figsize=(20, 50))
for i in range(num_vars):
  ax = fig.add_subplot(num_vars,1,i+1)
  ax.plot(data.index, scaled_features[:,i], label=col_names[i], c='red')
  ax.plot(data.index, data_scaled[:,0], label='10YT', c='grey')
  ax.set_title('10YT and ' + col_names[i])
  ax.set_xlabel('Date')
  ax.set_ylabel('Value')
  plt.legend()
fig.tight_layout(pad=3.0)
plt.show()

In [None]:
# distributions
data_scaled = pd.DataFrame(data_scaled, columns=col_names)
data_scaled.hist(bins=30, figsize=(15, 10))

In [None]:
# correlation matrix 
plt.figure(figsize=(15,8))
matrix = np.triu(data_scaled.corr(method='spearman'))
heat_map = sns.heatmap(data_scaled.corr(method='spearman'),annot=True,annot_kws={"size":14},cmap= 'YlGnBu',mask=matrix)
heat_map.set_yticklabels(heat_map.get_yticklabels(), rotation=60)
heat_map.set_xticklabels(heat_map.get_xticklabels(), rotation=60)
plt.tick_params(labelsize=12)
plt.title('Heatmap Spearman Correlation')

In [None]:
# drop gdp 
data_scaled = data_scaled.drop(['gdp','retail', 'ppi'], axis=1)
plt.figure(figsize=(15,8))
matrix = np.triu(data_scaled.corr(method='spearman'))
heat_map = sns.heatmap(data_scaled.corr(method='spearman'),annot=True,annot_kws={"size":14},cmap= 'YlGnBu',mask=matrix)
heat_map.set_yticklabels(heat_map.get_yticklabels(), rotation=60)
heat_map.set_xticklabels(heat_map.get_xticklabels(), rotation=60)
plt.tick_params(labelsize=12)
plt.title('Heatmap Spearman Correlation')

In [None]:
# parallel co-ordinates plot 
fig = px.parallel_coordinates(data_scaled, color='t10yr',color_continuous_scale=px.colors.diverging.Tealrose,
                              title='Parallel Co-ordinates Plot')

fig.show()