In [4]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import MonthEnd

import matplotlib.pyplot as plt
import pickle

import bamboolib

from fredapi import Fred
fred = Fred(api_key='98d7e668ce51c2997660ab73367c689a')

from tqdm import tqdm

import plotly
import plotly.offline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import HTML
from IPython.core.display import display, HTML
import copy

# Plotly settings
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# 1. Load  datasets

In [18]:
df_filename = '../../data/processed/asset_prices_M.pkl'
df_W_filename = '../../data/processed/asset_prices_W.pkl'
bf_dataset_filename = '../../data/processed/base_assets_M.pkl'
bf_W_dataset_filename = '../../data/processed/base_assets_W.pkl'
mf_filename = '../../data/processed/macro_factors_M.pkl'
mf_W_filename = '../../data/processed/macro_factors_W.pkl'
kf_asof_dataset_filename = '../../data/processed/kf_asof_dataset_M.pkl'
kf_asof_W_dataset_filename = '../../data/processed/kf_asof_dataset_W.pkl'
kkt_filename = '../../data/processed/KKT_index_M.pkl'
kkt_W_filename = '../../data/processed/KKT_index_W.pkl'

In [19]:
_freq = 'M'

In [173]:
if _freq == 'M':
    df = pd.read_pickle(df_filename)
    bf = pd.read_pickle(bf_dataset_filename)
    mf = pd.read_pickle(mf_filename)
    kkt = pd.read_pickle(kkt_filename)
    with open(kf_asof_dataset_filename, 'rb') as f:
        kf_asof = pickle.load(f)
elif _freq == 'W':
    df = pd.read_pickle(df_W_filename)
    bf = pd.read_pickle(bf_W_dataset_filename)
    mf = pd.read_pickle(mf_W_filename)
    kkt = pd.read_pickle(kkt_W_filename)
    with open(kf_asof_W_dataset_filename, 'rb') as f:
        kf_asof = pickle.load(f)

# 2. Explanatory Data Analysis and Visualization

## 1) How base asset returns are distributed

In [174]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA

- We classify dates into three categories by recession probabilities; recession(0) when <= 0.2, robust growth(2) when >= 0.8, in-between(1) otherwise.

In [252]:
threshold = [0.25, 0.75]
kkt.set_index(pd.PeriodIndex(kkt.index, freq='M'), inplace=True)
kkt.index.name = 'date'
kkt.loc[kkt.recession_prob <= threshold[0], 'target'] = 'Recession'
kkt.loc[kkt.recession_prob >= threshold[1], 'target'] = 'Robust growth'
kkt.loc[np.logical_and(kkt.recession_prob > threshold[0], kkt.recession_prob < threshold[1]), 'target'] = 'In-between'

In [253]:
# kkt = kkt.drop(kkt[kkt.target==1].index, axis=0)

In [254]:
kkt.target.iplot(kind='hist', labels=True)

In [255]:
# X = {}
# for col in bf.columns:
#     X[col] = bf.loc[kkt.index, col].values.copy()
X = bf.loc[kkt.index].values.copy()
y = kkt.target
y = y.reset_index(drop=True)

In [260]:
pca = PCA(n_components=2)
pc = pca.fit_transform(X)
df_pc = pd.DataFrame(data=pc, columns=['PC1', 'PC2'])

In [261]:
df_pc = pd.concat([df_pc, y], axis=1)

In [301]:
df_pc.iplot(kind='scatter', theme='white', x='PC1', y='PC2', categories='target')

## 2) Relationships between base asset returns and macro factor returns

In [318]:
import plotly.express as px

In [319]:
df_ft = pd.merge(bf, mf, how='left', left_index=True, right_index=True)
df_ft = pd.merge(df_ft, kkt.target, how='right', left_index=True, right_index=True)

In [330]:
df_ft = df_ft.reset_index()

In [331]:
df_ft

Unnamed: 0,date,DMEQ,UST,CRE,ILB,DXY,FXCS,GOLD,ENGY,REIT,GRTH,INFL,target
0,1955-12,-0.002819,-0.005825,-0.005996,0.008962,-0.001677,-0.004004,-0.002159,-0.002159,0.194855,-0.513672,0.0,Recession
1,1956-01,-0.039236,0.000410,-0.001700,0.004580,-0.003245,-0.004687,-0.003196,-0.002054,-0.002054,-0.513672,0.0,Recession
2,1956-02,0.032254,0.005678,-0.004097,-0.000994,-0.001839,-0.003634,-0.001845,-0.001845,-0.001845,-0.256836,0.0,In-between
3,1956-03,0.065048,-0.009764,-0.006215,0.015335,-0.001655,0.002156,-0.001914,-0.001914,-0.001914,-0.256836,0.0,In-between
4,1956-04,-0.004207,-0.018364,-0.011088,0.023817,-0.001763,-0.008434,-0.002142,-0.002142,-0.002142,-0.256836,0.3,In-between
...,...,...,...,...,...,...,...,...,...,...,...,...,...
769,2020-01,-0.007341,0.030448,0.005186,-0.010904,0.009096,0.117527,0.045057,-0.170346,0.010767,0.034700,0.2,In-between
770,2020-02,-0.089644,0.029440,-0.009011,-0.017026,0.006257,0.061670,-0.003519,-0.142764,-0.084784,-0.022400,-0.2,Robust growth
771,2020-03,-0.144029,0.033896,-0.116444,-0.053702,0.007222,0.215589,-0.007451,-0.783936,-0.249508,-1.575300,-0.8,Robust growth
772,2020-04,0.103631,0.005420,0.047135,0.021966,-0.000372,-0.085269,0.066968,-0.083516,0.079711,-3.888200,-1.2,Robust growth


In [366]:
df1 = df_ft.melt(value_vars=bf.columns, value_name ='base_asset_rt', var_name='base_asset', id_vars=['date', 'target'])
df2 = df_ft.melt(value_vars='GRTH', value_name ='mf_grth_rt', var_name='mf1', id_vars=['date', 'target'])
df3 = df_ft.melt(value_vars='INFL', value_name ='mf_infl_rt', var_name='mf2', id_vars=['date', 'target'])

In [367]:
df1.set_index(['date', 'target'], inplace=True)
df2.set_index(['date', 'target'], inplace=True)
df3.set_index(['date', 'target'], inplace=True)

In [368]:
df_tidy = pd.merge(df1, df2, how='outer', left_index=True, right_index=True)
df_tidy = pd.merge(df_tidy, df3, how='outer', left_index=True, right_index=True)

In [369]:
df_tidy = df_tidy.reset_index()

In [370]:
df_tidy

Unnamed: 0,date,target,base_asset,base_asset_rt,mf1,mf_grth_rt,mf2,mf_infl_rt
0,1955-12,Recession,DMEQ,-0.002819,GRTH,-0.513672,INFL,0.0
1,1955-12,Recession,UST,-0.005825,GRTH,-0.513672,INFL,0.0
2,1955-12,Recession,CRE,-0.005996,GRTH,-0.513672,INFL,0.0
3,1955-12,Recession,ILB,0.008962,GRTH,-0.513672,INFL,0.0
4,1955-12,Recession,DXY,-0.001677,GRTH,-0.513672,INFL,0.0
...,...,...,...,...,...,...,...,...
6961,2020-05,Robust growth,DXY,-0.006859,GRTH,0.948300,INFL,-0.2
6962,2020-05,Robust growth,FXCS,-0.089951,GRTH,0.948300,INFL,-0.2
6963,2020-05,Robust growth,GOLD,0.025573,GRTH,0.948300,INFL,-0.2
6964,2020-05,Robust growth,ENGY,0.633219,GRTH,0.948300,INFL,-0.2


### (1) Ordinary Linear Regression (OLS)

`GRTH` vs Base assets

In [409]:
fig = px.scatter(df_tidy,
                 title='GRTH vs Base asset returns by target',
                 labels=dict(mf_grth_rt="GRTH", base_asset_rt="Base assets", base_asset="y", facet_col_wrap=5),
                 x='mf_grth_rt',
                 y='base_asset_rt',
                 facet_row='target',
                 facet_col='base_asset',
                 color='target',
                 trendline='ols',
                 trendline_color_override='black',
                 marginal_x='violin',
                 marginal_y='box',
                 template='plotly_white',
                 height=800)
results = px.get_trendline_results(fig)
fig.show()

`GRTH` vs `DMEQ`

In [407]:
fig = px.scatter(df_ft, x='GRTH', y='DMEQ', color='target', trendline='ols', marginal_y='histogram', marginal_x='box', template='plotly_white')
results = px.get_trendline_results(fig)
fig.show()

- OLS regression results for `Recession`

In [287]:
results.query("target == 'Recession'").px_fit_results.iloc[0].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.063
Model:,OLS,Adj. R-squared:,0.06
Method:,Least Squares,F-statistic:,20.68
Date:,"Wed, 24 Jun 2020",Prob (F-statistic):,7.83e-06
Time:,11:00:24,Log-Likelihood:,603.05
No. Observations:,308,AIC:,-1202.0
Df Residuals:,306,BIC:,-1195.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0055,0.002,2.756,0.006,0.002,0.009
x1,0.0583,0.013,4.547,0.000,0.033,0.083

0,1,2,3
Omnibus:,44.406,Durbin-Watson:,2.191
Prob(Omnibus):,0.0,Jarque-Bera (JB):,121.688
Skew:,-0.652,Prob(JB):,3.7600000000000004e-27
Kurtosis:,5.789,Cond. No.,6.57


- OLS regression results for `Robust growth`

In [288]:
results.query("target == 'Robust growth'").px_fit_results.iloc[0].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.076
Model:,OLS,Adj. R-squared:,0.073
Method:,Least Squares,F-statistic:,20.7
Date:,"Wed, 24 Jun 2020",Prob (F-statistic):,8.37e-06
Time:,11:01:04,Log-Likelihood:,403.96
No. Observations:,252,AIC:,-803.9
Df Residuals:,250,BIC:,-796.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0021,0.003,-0.698,0.486,-0.008,0.004
x1,0.0344,0.008,4.550,0.000,0.020,0.049

0,1,2,3
Omnibus:,18.651,Durbin-Watson:,1.852
Prob(Omnibus):,0.0,Jarque-Bera (JB):,59.466
Skew:,0.109,Prob(JB):,1.22e-13
Kurtosis:,5.37,Cond. No.,2.46


`INFL` vs `DMEQ`

In [411]:
fig = px.scatter(df_ft, x='INFL', y='DMEQ', color='target', trendline='ols', marginal_y='histogram', marginal_x='box', template='plotly_white')
results = px.get_trendline_results(fig)
fig.show()

#### What if we don't classify?

In [410]:
fig = px.scatter(df_ft, x='GRTH', y='DMEQ', trendline='ols', template='plotly_white')
results = px.get_trendline_results(fig)
fig.show()
results.px_fit_results.iloc[0].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.067
Model:,OLS,Adj. R-squared:,0.065
Method:,Least Squares,F-statistic:,55.18
Date:,"Wed, 24 Jun 2020",Prob (F-statistic):,2.92e-13
Time:,12:57:19,Log-Likelihood:,1392.8
No. Observations:,774,AIC:,-2782.0
Df Residuals:,772,BIC:,-2772.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0030,0.001,2.113,0.035,0.000,0.006
x1,0.0402,0.005,7.428,0.000,0.030,0.051

0,1,2,3
Omnibus:,68.437,Durbin-Watson:,1.848
Prob(Omnibus):,0.0,Jarque-Bera (JB):,359.99
Skew:,-0.143,Prob(JB):,6.750000000000001e-79
Kurtosis:,6.329,Cond. No.,3.76


### Non-linear models

In [291]:
fig = px.scatter(df_ft, x='GRTH', y='DMEQ', facet_col='target', color='target', trendline='lowess')
results = px.get_trendline_results(fig)
fig.show()