# Analysis of American Economy 2000-2020

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import seaborn as sns
#in order to see the entire dataframe we change the option set
pd.set_option('display.max_rows', None, 'display.max_columns', None)

We create a directory for the Graphs. In order to do so we must import the library os.

In [None]:
import os
# if the path allready exists the do not make the directory
if os.path.exists('Graphs')==False :
    os.mkdir('Graphs')

First we import the indexes that we have obltained so as to begin the analysis.

This analysis will exclude the crisis period from 2007-12 to 2009-06. This period will be shown as a grey area in all the graphs.

## Money Index

### Money Supply

In [None]:
M1=pd.read_csv('M1.csv',parse_dates=['DATE'],index_col='DATE')
M1.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M1['2000':],'b',label='M1')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Billions of $',
       title='M1 Money Stock')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\M1_Money_Stock.png',bbox_inches='tight')

In [None]:
M2=pd.read_csv('M2.csv',parse_dates=['DATE'],index_col='DATE')
M2.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M2['2000':],'b',label='M2')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Billions of $',
       title='M2 Money Stock')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\M2_Money_Stock.png',bbox_inches='tight')

In [None]:
MZM=pd.read_csv('MZM.csv',parse_dates=['DATE'],index_col='DATE')
MZM.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(MZM['2000':],'b',label='M1')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Billions of $',
       title='MZM Money Stock')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\MZM_Money_Stock.png',bbox_inches='tight')

Plot the money supply

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M1['2000':],'b',label='M1')
ax.plot(M2['2000':],'g',label='M2')
ax.plot(MZM['2000':],'r',label='MZM')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Billions of $',
       title='Money Stock')
#insert a legend
ax.legend(loc='best')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Money_Stock.png',bbox_inches='tight')

In [None]:
M2_MON=M2.M2['2000':].resample('M').mean()
M2_MON.head()

In [None]:
MZM_MON=MZM.MZM['2000':].resample('M').mean()
MZM_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M2_MON,'g',label='M2')
ax.plot(MZM_MON,'r',label='MZM')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Billions of $',
       title='M2 - MZM Money Stock Monthly Average')
#insert a legend
ax.legend(loc='best')
fig.savefig('Graphs\M2_MZM_Money_Stock_Mon_Avg.png',bbox_inches='tight')

In [None]:
M2_log=np.log(M2_MON)
M2_log.head()

In [None]:
MZM_log=np.log(MZM_MON)
MZM_log.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M2_log,'g',label='M2')
ax.plot(MZM_log,'r',label='MZM')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Log Billions of $',
       title='Log M2 - MZM Money Stock')
#insert a legend
ax.legend(loc='best')
fig.savefig('Graphs\M2_MZM_Money_Stock_Norm.png',bbox_inches='tight')

### Money Velocities

In [None]:
M1V=pd.read_csv('M1V.csv',parse_dates=['DATE'],index_col='DATE')
M1V.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M1V['2000':],'b',label='M1V')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Ratio',
       title='Velocity of M1 Money Stock')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Velocity_of_M1_Money.png',bbox_inches='tight')

In [None]:
M2V=pd.read_csv('M2V.csv',parse_dates=['DATE'],index_col='DATE')
M2V.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M2V['2000':],'b',label='M2V')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Ratio',
       title='Velocity of M2 Money Stock')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Velocity_of_M2_Money.png',bbox_inches='tight')

In [None]:
MZMV=pd.read_csv('MZMV.csv',parse_dates=['DATE'],index_col='DATE')
MZMV.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(MZMV['2000':],'b',label='MZMV')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Ratio',
       title='Velocity of MZM Money Stock')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Velocity_of_MZM_Money.png',bbox_inches='tight')

Plot all money velocities in one graph

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M1V['2000':],'b',label='M1V')
ax.plot(M2V['2000':],'r',label='M2V')
ax.plot(MZMV['2000':],'g',label='MZMV')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Ratio',
       title='Velocities of all Money Stock')
#insert a legend
ax.legend(loc='best')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Velocity_of_Money.png',bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(M2V['2000':],'r',label='M2V')
ax.plot(MZMV['2000':],'g',label='MZMV')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Ratio',
       title='Velocities of M2 & MZM Money Stock')
#insert a legend
ax.legend(loc='best')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Velocity_of_M2_MZM_Money.png',bbox_inches='tight')

Create a single dataframe drom M2 and MZM Money Stock Dataframes with the merge function.

Merge is only for 2 dataframes

In [None]:
VMONST=pd.merge(M2V,MZMV,on='DATE', how='outer', indicator=True)
VMONST.head()

Create a single datraframe with all the Money Stock Values with concat function

In [None]:
VMONSTA=pd.concat([M1V['1980-11-03':],M2V.M2V,MZMV.MZMV],axis=1)
VMONSTA.head()

In [None]:
fig, ax = plt.subplots(nrows=2,ncols=1,figsize = (15,10))
ax[0].plot(M2['2000':],'g',label='M2')
ax[0].plot(MZM['2000':],'r',label='MZM')
ax[0].axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax[0].set(xlabel='time (Week)', ylabel='Billions of $',
       title='M2 - MZM Money Stock')
#insert a legend
ax[0].legend(loc='best')
#introduce grid on the plot
ax[0].grid()
ax[1].plot(M2V['2000':]['2000':],'g',label='M2V')
ax[1].plot(MZMV['2000':],'r',label='MZMV')
# define the axes properties and the title of the graph
ax[1].set(xlabel='time (Quarters)', ylabel='Ratio',
       title='Velocities of M2 & MZM Money Stock')
#insert a legend
ax[1].legend(loc='best')
#introduce grid on the plot
ax[1].axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
ax[1].grid()
#Save the plot
fig.savefig('Graphs\M2_MZM_Money_Stock_V.png',bbox_inches='tight')

### Money Multiplier

NOTICE:
The money multiplier stops at the end of 2019

In [None]:
MULT=pd.read_csv('MULT.csv',parse_dates=['DATE'],index_col='DATE')
MULT.head()

In [None]:
MULT.tail()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot( MULT['2000':],'b',label='MULT')
# draw a grey bar in order to show the crisis period. 
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (14 days)', ylabel='Ratio',
       title='M1 Money Multiplier')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\M1_Money_Multiplier.jpg',optimize=True,bbox_inches='tight')

in order to find the mean value of every month we will use the resample function with frequency ="M"=month nad the nimpy function "mean"

In [None]:
MULT_MON=MULT['MULT']['2000':].resample('M').mean() 
MULT_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot( MULT_MON,'b',label='MULT')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Ratio',
       title='M1 Money Multiplier Monthly Average')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\M1_Money_Multiplier_Mon_Avg.jpg',optimize=True,bbox_inches='tight')

## CONSLOANSs

### CONSLOANS Price Index

In [None]:
CPALT=pd.read_csv('CPALTT01USQ657N.csv',parse_dates=['DATE'],index_col='DATE')
CPALT.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot( CPALT['2000':],'b',label='MULT')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Growth Rate Previous Period',
       title='CONSLOANS Price Index: Total All Items for the United States')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\CONSLOANS_Price_Index.png',bbox_inches='tight')

### CONSLOANS Price Index for All Urban CONSLOANSs

In [None]:
CPIL=pd.read_csv('CPILFESL.csv',parse_dates=['DATE'],index_col='DATE')
CPIL.head()

In [None]:
CPIL_MON=CPIL['CPILFESL']['2000':].resample('M').mean()/100 
CPIL_MON.head()

In [None]:
CPIL_MON=CPIL_MON.shift(-1)
CPIL_MON.dropna(axis=0,inplace=True)
CPIL_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot( CPIL_MON,'b',label='CPILCPIL')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (months)', ylabel='Index 1982:1984=100',
       title='CONSLOANS Price Index for All Urban CONSLOANSs: All Items Less Food and Energy in U.S. City Average')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\CONSLOANS_Price_Index_All.png',bbox_inches='tight')

### Harmonized Index of CONSLOANS Prices

In [None]:
CPUSM=pd.read_csv('CP0000USM086NEST.csv',parse_dates=['DATE'],index_col='DATE')
# rename the column of the newly created series
CPUSM.rename({'CP0000USM086NEST':'CPUSM'},axis=1,inplace=True)
CPUSM.head()

In [None]:
CPUSM_MON=CPUSM.resample('M').mean()
CPUSM_MON.head()

In [None]:
CPUSM_MON=CPUSM_MON.shift(-1)
CPUSM_MON.dropna(axis=0,inplace=True)
CPUSM_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(CPUSM_MON,'b',label='CPUSM')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Index 2015=100',
       title='Harmonized Index of CONSLOANS Prices')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\CONSLOANS_Price.png',bbox_inches='tight')

### CONSLOANS Opinion Surveys: Confidence Indicators: Composite Indicators: OECD Indicator for the United States

In [None]:
CSCICP=pd.read_csv('CSCICP03USM665S.csv',parse_dates=['DATE'],index_col='DATE')
CSCICP.rename({'CSCICP03USM665S':'CSCICP'},axis=1,inplace=True)
CSCICP.head()

In [None]:
CSCICP_MON=CSCICP['2000':].resample('M').mean()/100
CSCICP_MON.head()

In [None]:
CSCICP_MON=CSCICP_MON.shift(-1)
CSCICP_MON.dropna(axis=0,inplace=True)
CSCICP_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(CSCICP_MON,'b',label='CSCICP')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Normalized (Normal=100)',
       title='CONSLOANS Opinion Surveys: Confidence Indicators: ')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\CONSLOANS_Opinion_Surveys.png',bbox_inches='tight')

### Inflation, CONSLOANS prices for the United States

In [None]:
INF=pd.read_csv('FPCPITOTLZGUSA.csv',parse_dates=['DATE'],index_col='DATE')
INF.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(INF['2000':],'b',label='INF')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Year)', ylabel='Percent',
       title='Inflation, CONSLOANS prices for the United States')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Inflation.png',bbox_inches='tight')

## Deposits

### Excess Reserves of Depository Institutions

In [None]:
EXCSR=pd.read_csv('EXCSRESNS.csv',parse_dates=['DATE'],index_col='DATE')
EXCSR.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(EXCSR['2000':],'b',label='EXCSR')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Millinos of $',
       title='Excess Reserves of Depository Institutions')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Excess_Reserves_of_Depository.png',bbox_inches='tight')

In [None]:
EXCSR_MON=EXCSR.EXCSRESNS['2000':].resample('M').mean()/100
EXCSR_MON.head()

In [None]:
EXCSR_MON=EXCSR_MON.shift(-1)
EXCSR_MON.dropna(axis=0,inplace=True)
EXCSR_MON.head()

In [None]:
EXCSR_MON=EXCSR_MON.shift(-1)
EXCSR_MON.dropna(axis=0,inplace=True)
EXCSR_MON.head()

In [None]:
EXCSR_log=np.log(EXCSR_MON)
EXCSR_log.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(EXCSR_log,'b',label='EXCSR')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Log Millions of $',
       title='Log Excess Reserves of Depository Institutions')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Excess_Reserves_of_Depository_Norm.png',bbox_inches='tight')

### Total Savings Deposits at all Depository Institutions

In [None]:
WSAV=pd.read_csv('WSAVNS.csv',parse_dates=['DATE'],index_col='DATE')
WSAV.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(WSAV['2000':],'b',label='WSAV')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Week)', ylabel='Billions of $',
       title='Total Savings Deposits at all Depository Institutions')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Total_Savings_Deposits.png',bbox_inches='tight')

In [None]:
WSAV_MON=WSAV['WSAVNS']['2000':].resample('M').mean()
WSAV_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(WSAV_MON,'b',label='WSAV_MON')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Log Billions of $',
       title='Total Savings Deposits at all Depository Institutions Monthly Average')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Total_Savings_Deposits_Mon_Avg.png',bbox_inches='tight')

In [None]:
WSAV_log=np.log(WSAV_MON)
WSAV_log.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(WSAV_log,'b',label='WSAV_norm')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Log Billions of $',
       title='Log Total Savings Deposits at all Depository Institutions Monthly Average')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Total_Savings_Deposits_Mon_Avg_Norm.png',bbox_inches='tight')

### Savings Deposits - Total

In [None]:
SAVE=pd.read_csv('SAVINGSL.csv',parse_dates=['DATE'],index_col='DATE')
SAVE.head()

In [None]:
SAVE_MON=SAVE['2000':].resample('M').mean()
SAVE_MON.head()

In [None]:
SAVE_MON=SAVE_MON.shift(-1)
SAVE_MON.dropna(axis=0,inplace=True)
SAVE_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(SAVE_MON,'b',label='SAVE')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Billions of U.S. Dollars',
       title='Savings Deposits - Total ')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Savings_Deposits.png',bbox_inches='tight')

In [None]:
SAVE_log=np.log(SAVE_MON)
SAVE_log.head()

## Interest Rates

### Bank Prime Loan Rate

For the DPRIME variable we can observe that we have missing values. The basic problem is that all data are represented as object and not float values

In [None]:
Dprime=pd.read_csv('DPRIME.csv',parse_dates=['DATE'],index_col='DATE')
Dprime.head()

We replace the missing values which was represented as "." in the original Dataset with the np.NaN value

In [None]:
Drime=Dprime.replace(".",np.NaN)
Dprime.head()

In [None]:
# check the type of the variables using the dtypes function
Dprime.dtypes

We convert the string series to float values

In [None]:
Dprime ['DPRIME'] = pd.to_numeric(Dprime['DPRIME'], errors='coerce')
# check again to see if variables are the correct type
Dprime.dtypes

We the fill the NaN values with the previous values. This method considerd the optimal for this dataset because there is a contiouty in the values of the data

In [None]:
Dprime=Dprime.fillna(method='bfill')

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(Dprime['2000':],'b',label='DPRIME')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (day)', ylabel='Percent',
       title='Bank Prime Loan Rate')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Bank_Prime_Loan.png',bbox_inches='tight')

In [None]:
DPRIME_MON=Dprime['DPRIME']['2000':'2020-02'].resample('M').mean() 
DPRIME_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(DPRIME_MON,'b',label='DPRIME')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Percent',
       title='Bank Prime Loan Rate Monthly Average')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Bank_Prime_Loan.png',bbox_inches='tight')

### Effective Federal Funds Rate

In [None]:
FED=pd.read_csv('FEDFUNDS.csv',parse_dates=['DATE'],index_col='DATE')
FED['2000':].head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(FED['2000':],'b',label='FED')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Percent',
       title='Effective Federal Funds Rate')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Effective_Federal_Funds_Rate_Mon_Avg.png',bbox_inches='tight')

In [None]:
FED_MON=FED.FEDFUNDS['2000':].resample('M').mean()
FED_MON.head()

In [None]:
FED_MON=FED_MON.shift(-1)
FED_MON.dropna(axis=0,inplace=True)
FED_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(FED_MON,'b',label='FED')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Percent',
       title='Effective Federal Funds Rate')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Effective_Federal_Funds_Rate_Mon_Avg.png',bbox_inches='tight')

### 10-Year Treasury Constant Maturity Minus 3-Month Treasury Constant Maturity

This dataset has missing values

In [None]:
TYM=pd.read_csv('T10Y3M.csv',parse_dates=['DATE'],index_col='DATE')
# check the type of the variable
type(TYM.T10Y3M[0])

In order to fill the blank cells we will make use of the interpolation linear method

In [None]:
TYM=TYM.replace(".",np.NaN)
TYM ['T10Y3M'] = pd.to_numeric(TYM['T10Y3M'], errors='coerce')
TYM=TYM.interpolate(method='linear',axis=0)
TYM.head()

In [None]:
#check again to see if the variable is the correct type
type(TYM.T10Y3M[0])

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(TYM['2000':],'b',label='TYM')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (day)', ylabel='Percent',
       title='10-Year Treasury Constant Maturity Minus 3-Month Treasury Constant Maturity')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Treasury_Constant_Maturity.png',bbox_inches='tight')

In [None]:
TYM_MON=TYM['T10Y3M']['2000':'2020-02'].resample('M').mean()/100
TYM_MON.head()

In [None]:
TYM_MON.tail()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(TYM_MON,'b',label='TYM')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Percent',
       title='10-Year Treasury Constant Maturity Minus 3-Month Treasury Constant Maturity Monthly Average')
#introduce grid on the plot
ax.grid()
#show the plot
plt.show()
#Save the plot
fig.savefig('Graphs\Treasury_Constant_Maturity_Mon_Avg.png',bbox_inches='tight')

## Loans

### CONSLOANS Loans, All Commercial Banks

In [None]:
CONS=pd.read_csv('CONSUMER.csv',parse_dates=['DATE'],index_col='DATE')
CONS.rename(columns={'CONSUMER':'CONSLOANS'},inplace=True)
CONS.head()

In [None]:
CONS_MON=CONS['2000':].resample('M').mean()
CONS_MON.head()

In [None]:
CONS_MON=CONS_MON.shift(-1)
CONS_MON.dropna(axis=0,inplace=True)
CONS_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(CONS_MON,'b',label='CONS')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Billions of U.S. Dollars',
       title='CONSLOANS Loans, All Commercial Banks')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\CONSLOANS_Loans.png',bbox_inches='tight')

In [None]:
CONS_log=np.log(CONS_MON)
CONS_log.head()

### Real Estate Loans: Residential Real Estate Loans: Revolving Home Equity Loans, All Commercial Banks

In [None]:
REL=pd.read_csv('RHEACBW027SBOG.csv',parse_dates=['DATE'],index_col='DATE')
REL.rename(columns={'RHEACBW027SBOG':'RealEstLoans'},inplace=True)
REL.head()

In [None]:
REL_MON=REL['2000':].resample('M').mean()
REL_MON.head()

In [None]:
REL_MON=REL_MON.shift(-1)
REL_MON.dropna(axis=0,inplace=True)
REL_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(REL_MON,'b',label='REL')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Billions of U.S. Dollars',
       title='Real Estate Loans: Residential Real Estate Loans')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Real_Estate_Loans.png',bbox_inches='tight')

In [None]:
REL_log=np.log(REL_MON)
REL_log.head()

### Commercial and Industrial Loans, All Commercial Banks

In [None]:
BUSL=pd.read_csv('BUSLOANS.csv',parse_dates=['DATE'],index_col='DATE')
BUSL.head()

In [None]:
BUSL_MON=BUSL['2000':].resample('M').mean()
BUSL_MON.head()

In [None]:
BUSL_MON=BUSL_MON.shift(-1)
BUSL_MON.dropna(axis=0,inplace=True)
BUSL_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(BUSL_MON,'b',label='BUSL')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Billions of U.S. Dollars',
       title='Commercial and Industrial Loans, All Commercial Banks ')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Comercial_Industrial_Loans.png',bbox_inches='tight')

In [None]:
BUSL_log=np.log(BUSL_MON)
BUSL_log.head()

### Delinquency Rate on Credit Card Loans, All Commercial Banks

In [None]:
DRC=pd.read_csv('DRCCLACBS.csv',parse_dates=['DATE'],index_col='DATE')
DRC.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(DRC,'b',label='DRC')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Percent, Seasonally Adjusted',
       title='Delinquency Rate on Credit Card Loans, All Commercial Banks ')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Delinquency_Rate_Credit_CardLoans.png',bbox_inches='tight')

### Delinquency Rate on All Loans, All Commercial Banks 

In [None]:
DRA=pd.read_csv('DRALACBS.csv',parse_dates=['DATE'],index_col='DATE')
DRA.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(DRA['2000':],'b',label='DRA')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Quarters)', ylabel='Percent, Seasonally Adjusted',
       title='Delinquency Rate on All Loans, All Commercial Banks ')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Delinquency_Rate_Credit_CardLoans.png',bbox_inches='tight')

## Stock Index

### S&P 500 Index

In [None]:
GSPC=pd.read_csv('^GSPC.csv',parse_dates=['Date'],index_col='Date')
GSPC.head()

In [None]:
GSPC.tail()

In [None]:
GSPC_MON=GSPC['Close']['2000':'2020-02'].resample('M').mean()
# rename the column of the newly created series
GSPC_MON.rename('SP500', inplace=True)
GSPC_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(GSPC_MON,'b',label='GSPC')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Index Value',
       title='S&P 500 Adj Close Monthly Average')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\SP_500_Mon_Avg.png',bbox_inches='tight')

In [None]:
GSPC_log=np.log(GSPC_MON)
GSPC_log.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(GSPC_log,'b',label='GSPC')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Log Index Value',
       title='Log S&P 500 Adj Close Index')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\SP_500_Norm.png',bbox_inches='tight')

### VIX Index

In [None]:
VIX=pd.read_csv('^VIX.csv',parse_dates=['Date'],index_col='Date')
VIX.head()

In [None]:
VIX_MON=VIX['Close']['2000':'2020-02'].resample('M').mean()/100
# rename the column of the newly created series
VIX_MON.rename('VIX', inplace=True)
VIX_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(VIX_MON,'b',label='VIX')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Percent',
       title='VIX Monthly Average')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\VIX_Mon_Avg.png',bbox_inches='tight')

## Macroeconomic Index

### US Policy Uncertainty Data

In [None]:
USP=pd.read_excel('US_Policy_Uncertainty_Data.xlsx','Main Index')
USP.head()

In [None]:
type(USP.Year[1])

In [None]:
USP['Date']=pd.to_datetime(USP[['Year','Month','Day']])
USP.head()

In [None]:
USP.set_index('Date',inplace=True)
USP.head()

In [None]:
USP.drop(columns=['Year','Month','Day'],axis=1,inplace=True)
USP.head()

In [None]:
USP_MON=USP['Three_Component_Index']['2000':].resample('M').mean()/100
# rename the column of the newly created series
USP_MON.rename('USPUI', inplace=True)
USP_MON.head()

In [None]:
f, ax = plt.subplots(figsize = (15,5))
ax.plot(USP_MON,'b',label='USP')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Index Value',
       title='US Uncertainty  Monthly Index')
#introduce grid on the plot
ax.grid()
#Save the plot
f.savefig('USP_Mon.png',bbox_inches='tight')

In [None]:
USP_log=(USP_MON)
USP_log.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(USP_log,'b',label='USP')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Index Value',
       title='US Uncertainty  Monthly Index Normalised')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('USP_Mon_Norm.png',bbox_inches='tight')

### Industrial Production Index

In [None]:
INDPRO=pd.read_csv('INDPRO.csv',parse_dates=['DATE'],index_col='DATE')
INDPRO=INDPRO/100
INDPRO.head()

In [None]:
INDPRO.tail()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(INDPRO['2000':],'b',label='INDPRO')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='INDEX 2012=100',
       title='Idustrial Production Index')
#introduce grid on the plot
ax.grid()

In [None]:
INDPRO_MON=INDPRO['2000':'2020-02'].resample('M').mean()/100
# rename the column of the newly created series
INDPRO_MON.head()

In [None]:
INDPRO_MON=INDPRO_MON.shift(-1)
INDPRO_MON.dropna(axis=0,inplace=True)
INDPRO_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(INDPRO_MON,'b',label='INDPRO')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Index 2012=100',
       title='Industrial Production Index')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\IndPro.png',bbox_inches='tight')

In [None]:
INDPRO_log=(INDPRO_MON)
INDPRO_log.head()

### Producer Price Index for All Commodities

In [None]:
PPIACO=pd.read_csv('PPIACO.csv',parse_dates=['DATE'],index_col='DATE')
PPIACO.head()

In [None]:
PPIACO_MON=PPIACO['2000':].resample('M').mean()/100
PPIACO_MON.head()

In [None]:
PPIACO_MON=PPIACO_MON.shift(-1)
PPIACO_MON.dropna(axis=0,inplace=True)
PPIACO_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(PPIACO_MON,'b',label='PPIACO')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Index 1982=100',
       title='Producer Price Index for All Commodities')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Producer_Price.png',bbox_inches='tight')

### Unemployment Rate

In [None]:
UNRATE=pd.read_csv('UNRATE.csv',parse_dates=['DATE'],index_col='DATE')
UNRATE.head()

In [None]:
UNRATE_MON=UNRATE['2000':].resample('M').mean()
UNRATE_MON.head()

In [None]:
UNRATE_MON=UNRATE_MON.shift(-1)
UNRATE_MON.dropna(axis=0,inplace=True)
UNRATE_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(UNRATE_MON,'b',label='UNRATE')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='Percent',
       title='Unemployment Rate')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('UnemploymentRate.png',bbox_inches='tight')

### CONSLOANS Sentiment

In [None]:
UMCSENT=pd.read_csv('UMCSENT.csv',parse_dates=['DATE'],index_col='DATE',na_values='.')
UMCSENT.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(UMCSENT['2000':],'b',label='UNRATE')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='index 1966:Q1=100',
       title='CONSLOANS Sentiment')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\CONSLOANSSentiment.png',bbox_inches='tight')

In [None]:
UMCSENT=UMCSENT.shift(-1)
UMCSENT=UMCSENT['2000' :]
UMCSENT_MON=UMCSENT.resample('M').mean()/100
UMCSENT_MON.head()

## Other Indexes

### Crude Oil Prices: West Texas Intermediate (WTI) - Cushing, Oklahoma

In [None]:
DCOIL=pd.read_csv('DCOILWTICO.csv',parse_dates=['DATE'],index_col='DATE')
DCOIL.head()

In [None]:
DCOIL.dtypes

In [None]:
DCOIL=pd.to_numeric(DCOIL['DCOILWTICO'], errors='coerce')
DCOIL.dtypes

In [None]:
DCOIL_MON=DCOIL['2000':].resample('M').mean()
DCOIL_MON.head()

In [None]:
DCOIL_MON=DCOIL_MON.shift(-1)
DCOIL_MON.dropna(axis=0,inplace=True)
DCOIL_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(DCOIL_MON,'b',label='DCOIL')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='US $ per Barrel',
       title='Crude Oil Prices: West Texas Intermediate (WTI) - Cushing, Oklahoma')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('Graphs\Crude_Oil_Prices.png',bbox_inches='tight')

In [None]:
DCOIL_log=np.log(DCOIL_MON)
DCOIL_log.head()

### U.S. / Euro Foreign Exchange Rate

In [None]:
USEU=pd.read_csv('DEXUSEU.csv',parse_dates=['DATE'],index_col='DATE')
USEU.head()

In [None]:
USEU.dtypes

In [None]:
USEU=pd.to_numeric(USEU['DEXUSEU'],errors='coerce')
USEU.dtypes

In [None]:
USEU_MON=USEU['2000':].resample('M').mean()
USEU_MON.head()

In [None]:
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(USEU_MON,'b',label='USEU')
ax.axvspan('2007-12', '2009-06', color='grey', alpha=0.5)
# define the axes properties and the title of the graph
ax.set(xlabel='time (Month)', ylabel='US $ to One Euro',
       title='U.S. / Euro Foreign Exchange Rate')
#introduce grid on the plot
ax.grid()
#Save the plot
fig.savefig('US_Euro_Foreign_Exchange_Rate.png',bbox_inches='tight')

## Cross Correlation

In [None]:
Money=pd.concat([MULT_MON,MZM_log,CPIL_MON,CSCICP_MON,EXCSR_log,WSAV_log,FED_MON,
                 TYM_MON,CONS_log,REL_log,BUSL_log,GSPC_log,VIX_MON,UMCSENT_MON,USP_MON,INDPRO_log,PPIACO_MON,UNRATE_MON,
                DCOIL_MON,USEU_MON],axis=1)
Money.head()

In [None]:
Money.tail()

The cross correlation of the selected variables from 2000 up to 2007 (with the year 2007)

We will calculate this using the spearman correlation method as we do not now if ther is a linear relationship between all variables

In [None]:
Money['2000':'2007-12'].tail()

In [None]:
Money_Cor=Money['2000':'2007-12'].corr(method='spearman')
Money_Cor

In [None]:
#define a mask to show only the values below the diagonal of the cross correlation matrix
mask = np.zeros_like(Money_Cor, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(20, 18))
# create the heatmap
sns.heatmap(Money_Cor, cmap='cool', vmax=1,vmin=-1, center=0, annot=True,
            square=False, linewidths=.5, cbar_kws={"shrink": .5},mask=mask)
#define the place ofthe tickers in each axes
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);
ax.set_yticklabels(
    ax.get_yticklabels(),
    rotation=0,
    horizontalalignment='right'
);
plt.title('Cross Correlation Matrix 2000-2007')
#set axis equal for better representation
plt.axis('equal')
# show
plt.show()
f.savefig('Graphs\Spearman_Corr_00-07.png',bbox_inches='tight')

The cross correlation of the selected variables from 2009-06 up to now

In [None]:
Money_Cor1=Money['2009-06':'2019'].corr(method='spearman')
Money_Cor1.head()

In [None]:
#define a mask to show only the values below the diagonal of the cross correlation matrix
mask = np.zeros_like(Money_Cor1, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(20, 18))
# create the heatmap
sns.heatmap(Money_Cor1, cmap='cool', vmax=1,vmin=-1, center=0, annot=True,
            square=False, linewidths=.5, cbar_kws={"shrink": .5}, mask=mask)
#define the place ofthe tickers in each axes
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);
ax.set_yticklabels(
    ax.get_yticklabels(),
    rotation=0,
    horizontalalignment='right'
);
plt.title('Cross Correlation Matrix 2009-2020')
#set axis equal for better representation
plt.axis('equal')
# show
plt.show()
f.savefig('Graphs\Spearman_Corr_10-now.png',bbox_inches='tight')

## Clustering

In [None]:
fig=sns.clustermap(Money['2000':'2007'], metric="correlation", method="single", cmap="Reds", standard_scale=1,linewidth=.1)
plt.title('Clustering 2000-2007')
plt.show()
fig.savefig('Graphs\ClusterMap_00_07.png',bbox_inches='tight')

In [None]:
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering

def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)

    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs)
    
Money_t=(Money['2000':'2007']-Money['2000':'2007'].mean())/Money['2000':'2007'].std()
# normalize the variables for the clustering analysis
Money_t1=Money_t.transpose()
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
fig=plt.figure(figsize=(20,10))
model = model.fit(Money_t1)
plt.title('Hierarchical Clustering Dendrogram from 2000 to 2007 method="Ward", metric="Euclidean"',)
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode='level',labels=list(Money['2000':'2007'].columns))
plt.xlabel("Variable Names")
plt.xticks(rotation=45)
plt.axhline(linestyle='--', y=22,color='k',lw=5)
plt.show()
fig.savefig('Graphs\Hierarchical_00_07.png',bbox_inches='tight')

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage

linked = linkage(Money_t1, 'single','correlation')

labelList = list(Money['2000':'2007'].columns)

fig=plt.figure(figsize=(20, 7))
dendrogram(linked,
            orientation='top',
            labels=labelList,
            distance_sort='descending')
plt.title('Hierarchical Clustering 00-07, method="Single", metric="Correlation"')
plt.xticks(rotation=45)
plt.show()
fig.savefig('Graphs\Hierarchical_Clustering_00_07_Corr.png')

In [None]:
linked = linkage(Money_t1, 'complete','correlation')

labelList = list(Money['2000':'2007'].columns)

fig=plt.figure(figsize=(20, 7))
dendrogram(linked,
            orientation='top',
            labels=labelList,
            distance_sort='descending')
plt.title('Hierarchical Clustering 00-07, method="Complete", metric="Correlation"')
plt.xticks(rotation=45)
plt.show()
fig.savefig('Graphs\Hierarchical_Clustering_00_07_Corr_Comp.png')

In [None]:
fig=sns.clustermap(Money['2009-06':'2019-11'], metric="correlation", method="single", cmap="Reds", standard_scale=1,linewidth=.1)
plt.title('Clustering 2009-2020')
plt.show()
fig.savefig('Graphs\ClusterMap_10_19.png',bbox_inches='tight')

In [None]:
Money_t=(Money['2009-06':'2019-11']-Money['2009-06':'2019-11'].mean())/Money['2009-06':'2019-11'].std()
# normalize the variables for the cluster analysis
Money_t1=Money_t.transpose()
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
fig=plt.figure(figsize=(22,7))
model = model.fit(Money_t1)
plt.title('Hierarchical Clustering Dendrogram from 2009 to 2020')
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode='level',labels=list(Money['2009-06':'2019-11'].columns))
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.axhline(linestyle='--', y=22,color='k',lw=5)
plt.xticks(rotation=45)
plt.show()
fig.savefig('Graphs\Hierarchical_09_19.png',bbox_inches='tight')

In [None]:
linked = linkage(Money_t1, 'single','correlation')

labelList = list(Money['2009-06':'2019-11'].columns)

fig=plt.figure(figsize=(20, 7))
dendrogram(linked,
            orientation='top',
            labels=labelList,
            distance_sort='descending')
plt.title('Hierarchical Clustering 09-20, method="Single", metric="Correlation"')
plt.xticks(rotation=45)
plt.show()
fig.savefig('Graphs\Hierarchical_Clustering_09_20_Corr.png',bbbox_inches='tight')

In [None]:
linked = linkage(Money_t1, 'complete','correlation')

labelList = list(Money['2009-06':'2019-11'].columns)

fig=plt.figure(figsize=(20, 7))
dendrogram(linked,
            orientation='top',
            labels=labelList,
            distance_sort='descending')
plt.title('Hierarchical Clustering 09-20, method="Complete", metric="Correlation"')
plt.xticks(rotation=45)
plt.show()
fig.savefig('Graphs\Hierarchical_Clustering_09_20_Corr_Comp.png',bbbox_inches='tight')

## PCA Analysis

In order to go further in to our analysis with the VAR estimation model we must first redefine our variable set. We must exclude from our clustering analysis the standalone variables.

In [None]:
from sklearn.preprocessing import StandardScaler
print (Money['2009-06':'2019-11'].shape)
Money_St = StandardScaler().fit_transform(Money['2009-06':'2019-11'])
Money_St=pd.DataFrame(Money_St,columns=list(Money['2009-06':'2019-11'].columns),index=Money['2009-06':'2019-11'].index)
Money_new=Money_St.drop(columns=['SP500','CPILFESL','PPIACO'])
Money_new.head()

In [None]:
Uncertainty=Money_t[['VIX','USPUI','FEDFUNDS','EXCSRESNS']]
print(Uncertainty.shape)
Uncertainty.head()

# Vector Autoregression (VAR)

Find the first differences of the time series

In [None]:
Moneyfd1=Money_St[['SP500','CPILFESL','T10Y3M','VIX','INDPRO','EXCSRESNS','MULT','DCOILWTICO']]
Moneyfd=Moneyfd1.diff()
Moneyfd.dropna(axis=0,inplace=True)
Moneyfd.head()

## Visualize the Time Series

In [None]:
#Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, dpi=120, figsize=(10,6))
for i, ax in enumerate(axes.flatten()):
    if i!=7:
        data = Moneyfd[Moneyfd.columns[i]]
        ax.plot(data, color='red', linewidth=1)
        # Decorations
        ax.set_title(Moneyfd.columns[i])
        ax.xaxis.set_ticks_position('none')
        ax.yaxis.set_ticks_position('none')
        ax.spines["top"].set_alpha(0)
        ax.tick_params(labelsize=6)
    else:
        plt.delaxes()
    
plt.tight_layout();
fig.savefig('Graphs\First_diff_Subplot.png',bbox_inches='tight')

## Testing Causation using Granger’s Causality Test

In [None]:
from statsmodels.tsa.stattools import grangercausalitytests
maxlag=1
test = 'ssr_chi2test'
def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

MoneyGC1=grangers_causation_matrix(Moneyfd, variables = Moneyfd.columns) 
MoneyGC1

In [None]:
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(20, 18))
# create the heatmap
cmap=sns.diverging_palette(10, 200, sep=20)
sns.heatmap(MoneyGC1, cmap=cmap, vmax=1,vmin=0, center=0.05, annot=True,
            square=False, linewidths=.5, cbar_kws={"shrink": .5})
#define the place ofthe tickers in each axes
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);
ax.set_yticklabels(
    ax.get_yticklabels(),
    rotation=0,
    horizontalalignment='right'
);
plt.title('P-Value of Granger Causality with maxlag=1',)
#set axis equal for better representation
plt.axis('equal')
# show
plt.show()
f.savefig('Graphs\Granger_Causality_Matrix.png',bbox_inches='tight')

The row are the Response (Y) and the columns are the predictor series (X).

For example, if you take the value 0.0003 in (row 1, column 2), it refers to the p-value of MULT_x causing GSPC_y. Whereas, the 0.000 in (row 2, column 1) refers to the p-value of MULT_y causing GSPC_x.

So, how to interpret the p-values?

If a given p-value is < significance level (0.05), then, the corresponding X series (column) causes the Y (row).

For example, P-Value of 0.0003 at (row 1, column 2) represents the p-value of the Grangers Causality test for MULT_x causing GSPC_y, which is less that the significance level of 0.05.

So, you can reject the null hypothesis and conclude MULT_x causes GSPC_y.

Looking at the P-Values in the above table, you can pretty much observe that all the variables (time series) in the system are interchangeably causing each other.

## Cointegration Test

Cointegration test helps to establish the presence of a statistically significant connection between two or more time series.

But, what does Cointegration mean?

To understand that, you first need to know what is ‘order of integration’ (d).

Order of integration(d) is nothing but the number of differencing required to make a non-stationary time series stationary.

Now, when you have two or more time series, and there exists a linear combination of them that has an order of integration (d) less than that of the individual series, then the collection of series is said to be cointegrated.

Ok?

When two or more time series are cointegrated, it means they have a long run, statistically significant relationship.

This is the basic premise on which Vector Autoregression(VAR) models is based on. So, it’s fairly common to implement the cointegration test before starting to build VAR models.

Alright, So how to do this test?

Soren Johanssen in his paper (1991) devised a procedure to implement the cointegration test.


In [None]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen

def cointegration_test(df, alpha=0.05): 
    """Perform Johanson's Cointegration Test and Report Summary"""
    out = coint_johansen(df,1,1)
    d = {'0.9':0, '0.95':1, '0.99':2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1-alpha)]]
    def adjust(val, length= 10): return str(val).ljust(length)

    # Summary
    print('Johansen Cointegratgion test for linear trend')
    print('Name       ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
    for col, trace, cvt in zip(df.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)

cointegration_test(Moneyfd)

In [None]:
 def cointegration_test(df, alpha=0.05): 
    """Perform Johanson's Cointegration Test and Report Summary"""
    out = coint_johansen(df,-1,1)
    d = {'0.9':0, '0.95':1, '0.99':2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1-alpha)]]
    def adjust(val, length= 10): return str(val).ljust(length)

    # Summary
    print('Johansen Cointegratgion test for no deterministic terms')
    print('Name       ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
    for col, trace, cvt in zip(df.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)

cointegration_test(Moneyfd)

## Check for Stationarity and Make the Time Series Stationary

First, we implement a nice function (adfuller_test()) that writes out the results of the ADF test for any given time series and implement this function on each series one-by-one.

In [None]:
def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='AIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    # Print Summary
    print(f'    Augmented Dickey-Fuller Test on "{name}"', "\n   ", '-'*47)
    print(f' Null Hypothesis: Data has unit root. Non-Stationary.')
    print(f' Significance Level    = {signif}')
    print(f' Test Statistic        = {output["test_statistic"]}')
    print(f' No. Lags Chosen       = {output["n_lags"]}')

    for key,val in r[4].items():
        print(f' Critical value {adjust(key)} = {round(val, 3)}')

    if p_value <= signif:
        print(f" => P-Value = {p_value}. Rejecting Null Hypothesis.")
        print(f" => Series is Stationary.")
    else:
        print(f" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.")
        print(f" => Series is Non-Stationary.")   

In [None]:
for name, column in Moneyfd.iteritems():
    adfuller_test(column, name=column.name)
    print('\n')

## How to Select the Order (P) of VAR model

To select the right order of the VAR model, we iteratively fit increasing orders of VAR model and pick the order that gives a model with least AIC.

Though the usual practice is to look at the AIC, you can also check other best fit comparison estimates of BIC, FPE and HQIC.

In [None]:
model = VAR(Moneyfd)
AICC=[0]*10
BICC=[0]*10
FPEE=[0]*10
HQICC=[0]*10
for i in [1,2,3,4,5,6,7,8,9]:
    result = model.fit(i)
    print('Lag Order =', i)
    AICC[i]=result.aic
    print('AIC : ', result.aic)
    BICC[i]=result.bic
    print('BIC : ', result.bic)
    FPEE[i]=result.fpe
    print('FPE : ', result.fpe)
    HQICC[i]=result.hqic
    print('HQIC: ', result.hqic, '\n')

In [None]:
fig, ax = plt.subplots(ncols=2,nrows=2,figsize = (20,10))
ax[0,0].plot(AICC)
ax[0,1].plot(BICC)
ax[1,0].plot(FPEE)
ax[1,1].plot(HQICC)
# define the axes properties and the title of the graph
ax[0,0].set(xlabel='time (Lag)', ylabel='AIC Value',
       title='AIC Criterion')
ax[0,1].set(xlabel='time (Lag)', ylabel='BIC Value',
       title='BIC Criterion')
ax[1,0].set(xlabel='time (Lag)', ylabel='FPE Value x 10^{-13}',
       title='FPE Criterion')
ax[1,1].set(xlabel='time (Lag)', ylabel='HQIC Value',
       title='HQIC Criterion')
#introduce grid on the plot
ax[0,0].grid()
ax[0,1].grid()
ax[1,0].grid()
ax[1,1].grid()
#Save the plot
fig.savefig('Graphs\AIC_BIC_Critiria.png',bbox_inches='tight')

In the above output, the AIC drops to lowest at lag 3, then increases at lag 3 and then continuously drops further.

Let’s go with the lag 3 model.

## VAR Model of Selected Order(p)

In [None]:
model_fitted = model.fit(2)
model_fitted.summary()

In [None]:
# var model time series of order 1
model_fitted.plot()
plt.show()

In [None]:
irf=model_fitted.irf(20)

In [None]:
fig=irf.plot(orth=False,subplot_params={'fontsize':9})
plt.show()
fig.savefig('Graphs\ImpulseResponce.png')

In [None]:
fig1=irf.plot(orth=False,impulse='SP500',response='MacroEco',subplot_params={'fontsize':20})
fig1.savefig('Graphs\ImpulseResponse Int-SP500.png',bbox_inches='tight')

fig2=irf.plot(orth=False,impulse='UNRATE',response='Uncertainty',subplot_params={'fontsize':20})
fig2.savefig('Graphs\ImpulseResponse Int-Macro.png',bbox_inches='tight')

fig3=irf.plot(orth=False,impulse='International',response='SP500',subplot_params={'fontsize':20})
fig3.savefig('Graphs\ImpulseResponse Int-Unc.png',bbox_inches='tight')

fig4=irf.plot(orth=False,impulse='International',response='SP500',subplot_params={'fontsize':20})
fig4.savefig('Graphs\ImpulseResponse Liq-SP500.png',bbox_inches='tight')

fig5=irf.plot(orth=False,impulse='International',response='T10Y3M',subplot_params={'fontsize':20})
fig5.savefig('Graphs\ImpulseResponse Liq-Macro.png',bbox_inches='tight')

fig6=irf.plot(orth=False,impulse='International',response='Uncertainty',subplot_params={'fontsize':20})
fig6.savefig('Graphs\ImpulseResponse Liq-Int.png',bbox_inches='tight')

fig7=irf.plot(orth=False,impulse='International',response='MacroEco',subplot_params={'fontsize':20})
fig7.savefig('Graphs\ImpulseResponse Liq-Unc.png',bbox_inches='tight')


plt.show()

## Check for Serial Correlation of Residuals (Errors) using Durbin Watson Statistic

A common way of checking for serial correlation of errors can be measured using the Durbin Watson’s Statistic.

Durbin Watson Statistic - Formula

$ DW=\frac{\sum_{t=2}^{T}(e_t-e_{t-1})^2)}{\sum_{t=1}^{T}(e_t^2)}$

The value of this statistic can vary between 0 and 4. The closer it is to the value 2, then there is no significant serial correlation. The closer to 0, there is a positive serial correlation, and the closer it is to 4 implies negative serial correlation.

In [None]:
from statsmodels.stats.stattools import durbin_watson
out = durbin_watson(model_fitted.resid)

for col, val in zip(Moneyfd.columns, out):
    print(col, ':', round(val, 2))