# Method 1 - Best Minus Rest Fresh Ideas

In [1]:
%matplotlib inline
import pandas as pd
import warnings
from statsmodels import api as sm

from eptooling import crsp, epd, fs, hfu, crsp, db, bi
from eptooling.bm import bmdb, iso
from epquant.engine import weights, portfolio
from epquant.gen import returns
from epquant.perf import relative_performance

warnings.filterwarnings("ignore")

## Fetch Holdings and Benchmark Data

In [2]:
# Get HFU Data

%store -r inst_data

# Count number of positions per manager

inst_data['num_pos'] = inst_data.groupby(['datadate', 'institution_id'])[
    'fsym_id'].transform(lambda s: s.nunique())

# Remove less than 20 holdings and less than $5mm per CSP

valid_data = inst_data[inst_data['num_pos'] >= 20]
valid_data = valid_data[valid_data['public_aum'] >= 5e6]

# Get Returns for Fresh Ideas

%store -r r

# Get Adjusted Quantities

qty_pivot = pd.pivot_table(inst_data, index='datadate', columns=['institution_id', 'fsym_id'], values='quantity',
                           aggfunc='sum')
qty_pivot = qty_pivot.mul(r.splits.reindex(qty_pivot.index), level=1).fillna(0)
qty_pivot = qty_pivot - qty_pivot.shift(1)
flat_qty = pd.melt(qty_pivot.reset_index(), id_vars='datadate', value_name='qty_change')
flat_qty = flat_qty[flat_qty['qty_change'] > 0]

# Get Pos Size Change

pos_size_pivot = pd.pivot_table(inst_data, index='datadate', columns=['institution_id', 'fsym_id'], values='target',
                                aggfunc='sum')
pos_size_pivot = pos_size_pivot - pos_size_pivot.shift(1)
flat_pos_size = pd.melt(pos_size_pivot.reset_index(), id_vars='datadate', value_name='size_change')
flat_pos_size = flat_pos_size[flat_pos_size['size_change'] > 0]

# Get Benchmark Weights

bm = crsp.get_crsp_market_portfolio()

# Merge Benchmark Weights onto Holdings

valid_data = valid_data.merge(bm, how='left', on=['datadate', 'fsym_id'])

# Calculate Market Tilt

valid_data['adj_target'] = valid_data['target'] - valid_data['weight'].fillna(0)

## Calculate Best Ideas

In [3]:
# Extract Best Ideas

valid_data['pos_rank'] = valid_data\
    .groupby(['datadate', 'institution_id'])['adj_target'].rank(ascending=False)

# Get Managers with Best Fresh Ideas

fresh_idea_mgrs = valid_data.merge(flat_qty).merge(flat_pos_size)
fresh_idea_mgrs = fresh_idea_mgrs[fresh_idea_mgrs['pos_rank'] == 1][['datadate', 'institution_id']].drop_duplicates()
fresh_idea_mgrs['fresh'] = 1

# Split Long and Short and Filter to Highest Tilts

long_ideas = valid_data[valid_data['pos_rank'] == 1].copy()
long_ideas = long_ideas.merge(fresh_idea_mgrs, on=['datadate', 'institution_id'], how='inner')
                                                                                         
short_ideas = valid_data[valid_data['pos_rank'] != 1].copy()
short_ideas = short_ideas.merge(fresh_idea_mgrs, on=['datadate', 'institution_id'], how='inner')

# Weight the best ideas equally

long_ideas['target'] = 1 / long_ideas.groupby('datadate')[
    'institution_id'].transform('count')

short_ideas['target'] = - short_ideas['target'] / short_ideas.groupby([
    'datadate', 'institution_id'])['target'].transform('sum') * \
                        (1 / short_ideas.groupby('datadate')['institution_id']
                         .transform(lambda s: s.nunique()))

best_ideas = pd.concat([long_ideas, short_ideas])

## Backtest Portfolio

In [4]:
w = weights.Weights(best_ideas)

%store -r r

p = portfolio.DailyPortfolio(w, r, end_date='2018-12-31')

monthly_return = p.port_return.loc['1999-07-01':].resample('M').apply(epd.prod_red)
monthly_return.name = 'port_return'

## Plot Performance

In [5]:
epd.nav(monthly_return).plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f0faeb1d6d8>

In [6]:
monthly_return.resample('A').apply(epd.prod_red)

datadate
1999-12-31    0.008270
2000-12-31   -0.050350
2001-12-31   -0.127074
2002-12-31    0.009549
2003-12-31   -0.015989
2004-12-31    0.017958
2005-12-31    0.050226
2006-12-31    0.010879
2007-12-31    0.078795
2008-12-31    0.011768
2009-12-31    0.024003
2010-12-31    0.007325
2011-12-31   -0.000244
2012-12-31    0.002343
2013-12-31    0.055906
2014-12-31    0.010733
2015-12-31    0.025372
2016-12-31   -0.037019
2017-12-31   -0.002389
2018-12-31    0.025876
Freq: A-DEC, Name: port_return, dtype: float64

## Fetch Factor Models

In [7]:
# Get Factor Models
q_factor_model = bi.get_q_factor_model()
fama_french_five_factor_model = bi.get_fama_french_five_factor_model()
four_factor_model = bi.get_four_factor_model()

# Concat Portfolio Return
q_factor = pd.concat([monthly_return, q_factor_model], axis=1)
fama_french_five = pd.concat([monthly_return, fama_french_five_factor_model], axis=1)
four_factor = pd.concat([monthly_return, four_factor_model], axis=1)

# Add Constants
q_factor.insert(1, 'const', 1)
fama_french_five.insert(1, 'const', 1)
four_factor.insert(1, 'const', 1)

# Split Period into Pre and Post Crisis
q_factor_pre, q_factor_post = q_factor.loc[:'2009-06-30'], q_factor.loc['2009-07-31':]
fama_french_five_pre, fama_french_five_post = fama_french_five.loc[:'2009-06-30'], fama_french_five.loc['2009-07-31':] 
four_factor_pre, four_factor_post = four_factor.loc[:'2009-06-30'], four_factor.loc['2009-07-31':] 

## Full Time Period Regressions

### Q Factor Model

In [8]:
sm.OLS(q_factor.iloc[:, :1], q_factor.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.197
Model:,OLS,Adj. R-squared:,0.183
Method:,Least Squares,F-statistic:,14.02
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,3e-10
Time:,12:06:27,Log-Likelihood:,709.4
No. Observations:,234,AIC:,-1409.0
Df Residuals:,229,BIC:,-1392.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0002,0.001,0.231,0.817,-0.001,0.002
mktrf,-0.0401,0.022,-1.824,0.070,-0.083,0.003
me,0.0152,0.026,0.595,0.553,-0.035,0.066
ia,-0.1130,0.039,-2.904,0.004,-0.190,-0.036
roe,0.1659,0.033,4.982,0.000,0.100,0.232

0,1,2,3
Omnibus:,19.086,Durbin-Watson:,2.056
Prob(Omnibus):,0.0,Jarque-Bera (JB):,41.015
Skew:,-0.376,Prob(JB):,1.24e-09
Kurtosis:,4.908,Cond. No.,53.0


### Q Factor Model - Newey West (9 Lag)

In [9]:
sm.OLS(q_factor.iloc[:, :1], q_factor.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.197
Model:,OLS,Adj. R-squared:,0.183
Method:,Least Squares,F-statistic:,3.695
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00616
Time:,12:06:27,Log-Likelihood:,709.4
No. Observations:,234,AIC:,-1409.0
Df Residuals:,229,BIC:,-1392.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0002,0.001,0.205,0.838,-0.002,0.002
mktrf,-0.0401,0.022,-1.857,0.063,-0.082,0.002
me,0.0152,0.045,0.338,0.735,-0.073,0.104
ia,-0.1130,0.082,-1.373,0.170,-0.274,0.048
roe,0.1659,0.061,2.734,0.006,0.047,0.285

0,1,2,3
Omnibus:,19.086,Durbin-Watson:,2.056
Prob(Omnibus):,0.0,Jarque-Bera (JB):,41.015
Skew:,-0.376,Prob(JB):,1.24e-09
Kurtosis:,4.908,Cond. No.,53.0


### Q Factor Model - Newey West (18 Lag)

In [10]:
sm.OLS(q_factor.iloc[:, :1], q_factor.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.197
Model:,OLS,Adj. R-squared:,0.183
Method:,Least Squares,F-statistic:,2.905
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.0226
Time:,12:06:27,Log-Likelihood:,709.4
No. Observations:,234,AIC:,-1409.0
Df Residuals:,229,BIC:,-1392.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0002,0.001,0.174,0.862,-0.002,0.002
mktrf,-0.0401,0.022,-1.834,0.067,-0.083,0.003
me,0.0152,0.044,0.343,0.731,-0.072,0.102
ia,-0.1130,0.079,-1.429,0.153,-0.268,0.042
roe,0.1659,0.065,2.570,0.010,0.039,0.292

0,1,2,3
Omnibus:,19.086,Durbin-Watson:,2.056
Prob(Omnibus):,0.0,Jarque-Bera (JB):,41.015
Skew:,-0.376,Prob(JB):,1.24e-09
Kurtosis:,4.908,Cond. No.,53.0


### Fama French Five Factor Model

In [11]:
sm.OLS(fama_french_five.iloc[:, :1], fama_french_five.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.244
Method:,Least Squares,F-statistic:,19.85
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,4.82e-14
Time:,12:06:27,Log-Likelihood:,718.59
No. Observations:,234,AIC:,-1427.0
Df Residuals:,229,BIC:,-1410.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0003,0.001,0.381,0.703,-0.001,0.002
mktrf,-0.0307,0.019,-1.575,0.117,-0.069,0.008
smb,-0.0792,0.024,-3.258,0.001,-0.127,-0.031
hml,0.0060,0.024,0.250,0.803,-0.042,0.054
umd,0.1080,0.016,6.811,0.000,0.077,0.139

0,1,2,3
Omnibus:,21.183,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.922
Skew:,-0.062,Prob(JB):,9.86e-19
Kurtosis:,5.914,Cond. No.,36.8


### Fama French Five Factor Model - Newey West (9 Lag)

In [12]:
sm.OLS(fama_french_five.iloc[:, :1], fama_french_five.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.244
Method:,Least Squares,F-statistic:,3.977
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00386
Time:,12:06:27,Log-Likelihood:,718.59
No. Observations:,234,AIC:,-1427.0
Df Residuals:,229,BIC:,-1410.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0003,0.001,0.322,0.747,-0.001,0.002
mktrf,-0.0307,0.018,-1.705,0.088,-0.066,0.005
smb,-0.0792,0.032,-2.506,0.012,-0.141,-0.017
hml,0.0060,0.020,0.295,0.768,-0.034,0.046
umd,0.1080,0.037,2.956,0.003,0.036,0.180

0,1,2,3
Omnibus:,21.183,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.922
Skew:,-0.062,Prob(JB):,9.86e-19
Kurtosis:,5.914,Cond. No.,36.8


### Fama French Five Factor Model - Newey West (18 Lag)

In [13]:
sm.OLS(fama_french_five.iloc[:, :1], fama_french_five.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.244
Method:,Least Squares,F-statistic:,3.638
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00677
Time:,12:06:27,Log-Likelihood:,718.59
No. Observations:,234,AIC:,-1427.0
Df Residuals:,229,BIC:,-1410.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0003,0.001,0.281,0.779,-0.002,0.002
mktrf,-0.0307,0.018,-1.736,0.083,-0.065,0.004
smb,-0.0792,0.026,-3.088,0.002,-0.129,-0.029
hml,0.0060,0.017,0.361,0.718,-0.027,0.039
umd,0.1080,0.036,2.992,0.003,0.037,0.179

0,1,2,3
Omnibus:,21.183,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.922
Skew:,-0.062,Prob(JB):,9.86e-19
Kurtosis:,5.914,Cond. No.,36.8


### Carhart Four Factor Model

In [14]:
sm.OLS(four_factor.iloc[:, :1], four_factor.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.244
Method:,Least Squares,F-statistic:,19.85
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,4.82e-14
Time:,12:06:27,Log-Likelihood:,718.59
No. Observations:,234,AIC:,-1427.0
Df Residuals:,229,BIC:,-1410.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0003,0.001,0.381,0.703,-0.001,0.002
mktrf,-0.0307,0.019,-1.575,0.117,-0.069,0.008
smb,-0.0792,0.024,-3.258,0.001,-0.127,-0.031
hml,0.0060,0.024,0.250,0.803,-0.042,0.054
umd,0.1080,0.016,6.811,0.000,0.077,0.139

0,1,2,3
Omnibus:,21.183,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.922
Skew:,-0.062,Prob(JB):,9.86e-19
Kurtosis:,5.914,Cond. No.,36.8


### Carhart Four Factor Model - Newey West (9 Lag)

In [15]:
sm.OLS(four_factor.iloc[:, :1], four_factor.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.244
Method:,Least Squares,F-statistic:,3.977
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00386
Time:,12:06:28,Log-Likelihood:,718.59
No. Observations:,234,AIC:,-1427.0
Df Residuals:,229,BIC:,-1410.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0003,0.001,0.322,0.747,-0.001,0.002
mktrf,-0.0307,0.018,-1.705,0.088,-0.066,0.005
smb,-0.0792,0.032,-2.506,0.012,-0.141,-0.017
hml,0.0060,0.020,0.295,0.768,-0.034,0.046
umd,0.1080,0.037,2.956,0.003,0.036,0.180

0,1,2,3
Omnibus:,21.183,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.922
Skew:,-0.062,Prob(JB):,9.86e-19
Kurtosis:,5.914,Cond. No.,36.8


### Carhart Four Factor Model - Newey West (18 Lag)

In [16]:
sm.OLS(four_factor.iloc[:, :1], four_factor.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.257
Model:,OLS,Adj. R-squared:,0.244
Method:,Least Squares,F-statistic:,3.638
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00677
Time:,12:06:28,Log-Likelihood:,718.59
No. Observations:,234,AIC:,-1427.0
Df Residuals:,229,BIC:,-1410.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0003,0.001,0.281,0.779,-0.002,0.002
mktrf,-0.0307,0.018,-1.736,0.083,-0.065,0.004
smb,-0.0792,0.026,-3.088,0.002,-0.129,-0.029
hml,0.0060,0.017,0.361,0.718,-0.027,0.039
umd,0.1080,0.036,2.992,0.003,0.037,0.179

0,1,2,3
Omnibus:,21.183,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,82.922
Skew:,-0.062,Prob(JB):,9.86e-19
Kurtosis:,5.914,Cond. No.,36.8


## Pre-Crisis Regressions

### Q Factor Model

In [17]:
sm.OLS(q_factor_pre.iloc[:, :1], q_factor_pre.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.272
Model:,OLS,Adj. R-squared:,0.247
Method:,Least Squares,F-statistic:,10.76
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,1.92e-07
Time:,12:06:28,Log-Likelihood:,340.75
No. Observations:,120,AIC:,-671.5
Df Residuals:,115,BIC:,-657.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0011,0.001,-0.777,0.439,-0.004,0.002
mktrf,-0.0152,0.037,-0.409,0.683,-0.089,0.059
me,0.0570,0.037,1.549,0.124,-0.016,0.130
ia,-0.1677,0.058,-2.912,0.004,-0.282,-0.054
roe,0.2501,0.051,4.906,0.000,0.149,0.351

0,1,2,3
Omnibus:,4.267,Durbin-Watson:,1.99
Prob(Omnibus):,0.118,Jarque-Bera (JB):,4.915
Skew:,-0.155,Prob(JB):,0.0856
Kurtosis:,3.942,Cond. No.,47.0


### Q Factor Model - Newey West (9 Lag)

In [18]:
sm.OLS(q_factor_pre.iloc[:, :1], q_factor_pre.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.272
Model:,OLS,Adj. R-squared:,0.247
Method:,Least Squares,F-statistic:,3.992
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00454
Time:,12:06:28,Log-Likelihood:,340.75
No. Observations:,120,AIC:,-671.5
Df Residuals:,115,BIC:,-657.6
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0011,0.002,-0.666,0.506,-0.004,0.002
mktrf,-0.0152,0.031,-0.494,0.621,-0.076,0.045
me,0.0570,0.051,1.118,0.264,-0.043,0.157
ia,-0.1677,0.106,-1.583,0.113,-0.375,0.040
roe,0.2501,0.071,3.500,0.000,0.110,0.390

0,1,2,3
Omnibus:,4.267,Durbin-Watson:,1.99
Prob(Omnibus):,0.118,Jarque-Bera (JB):,4.915
Skew:,-0.155,Prob(JB):,0.0856
Kurtosis:,3.942,Cond. No.,47.0


### Q Factor Model - Newey West (18 Lag)

In [19]:
sm.OLS(q_factor_pre.iloc[:, :1], q_factor_pre.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.272
Model:,OLS,Adj. R-squared:,0.247
Method:,Least Squares,F-statistic:,3.955
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.00481
Time:,12:06:28,Log-Likelihood:,340.75
No. Observations:,120,AIC:,-671.5
Df Residuals:,115,BIC:,-657.6
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0011,0.002,-0.560,0.575,-0.005,0.003
mktrf,-0.0152,0.029,-0.522,0.602,-0.072,0.042
me,0.0570,0.051,1.126,0.260,-0.042,0.156
ia,-0.1677,0.100,-1.673,0.094,-0.364,0.029
roe,0.2501,0.068,3.679,0.000,0.117,0.383

0,1,2,3
Omnibus:,4.267,Durbin-Watson:,1.99
Prob(Omnibus):,0.118,Jarque-Bera (JB):,4.915
Skew:,-0.155,Prob(JB):,0.0856
Kurtosis:,3.942,Cond. No.,47.0


### Fama French Five Factor Model

In [20]:
sm.OLS(fama_french_five_pre.iloc[:, :1], fama_french_five_pre.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,10.48
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,2.84e-07
Time:,12:06:28,Log-Likelihood:,340.33
No. Observations:,120,AIC:,-670.7
Df Residuals:,115,BIC:,-656.7
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0004,0.001,-0.259,0.796,-0.003,0.002
mktrf,-0.0289,0.033,-0.879,0.381,-0.094,0.036
smb,-0.0798,0.037,-2.128,0.035,-0.154,-0.006
hml,0.0016,0.038,0.043,0.966,-0.073,0.076
umd,0.1156,0.023,4.992,0.000,0.070,0.161

0,1,2,3
Omnibus:,6.546,Durbin-Watson:,1.864
Prob(Omnibus):,0.038,Jarque-Bera (JB):,10.811
Skew:,0.1,Prob(JB):,0.00449
Kurtosis:,4.457,Cond. No.,33.2


### Fama French Five Factor Model - Newey West (9 Lag)

In [21]:
sm.OLS(fama_french_five_pre.iloc[:, :1], fama_french_five_pre.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,2.817
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.0284
Time:,12:06:28,Log-Likelihood:,340.33
No. Observations:,120,AIC:,-670.7
Df Residuals:,115,BIC:,-656.7
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0004,0.002,-0.235,0.814,-0.003,0.003
mktrf,-0.0289,0.028,-1.040,0.298,-0.083,0.026
smb,-0.0798,0.043,-1.857,0.063,-0.164,0.004
hml,0.0016,0.028,0.057,0.954,-0.053,0.056
umd,0.1156,0.044,2.630,0.009,0.029,0.202

0,1,2,3
Omnibus:,6.546,Durbin-Watson:,1.864
Prob(Omnibus):,0.038,Jarque-Bera (JB):,10.811
Skew:,0.1,Prob(JB):,0.00449
Kurtosis:,4.457,Cond. No.,33.2


### Fama French Five Factor Model - Newey West (18 Lag)

In [22]:
sm.OLS(fama_french_five_pre.iloc[:, :1], fama_french_five_pre.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,3.413
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.0112
Time:,12:06:28,Log-Likelihood:,340.33
No. Observations:,120,AIC:,-670.7
Df Residuals:,115,BIC:,-656.7
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0004,0.002,-0.199,0.842,-0.004,0.003
mktrf,-0.0289,0.027,-1.053,0.292,-0.083,0.025
smb,-0.0798,0.035,-2.277,0.023,-0.148,-0.011
hml,0.0016,0.023,0.070,0.945,-0.043,0.047
umd,0.1156,0.041,2.810,0.005,0.035,0.196

0,1,2,3
Omnibus:,6.546,Durbin-Watson:,1.864
Prob(Omnibus):,0.038,Jarque-Bera (JB):,10.811
Skew:,0.1,Prob(JB):,0.00449
Kurtosis:,4.457,Cond. No.,33.2


### Carhart Four Factor Model

In [23]:
sm.OLS(four_factor_pre.iloc[:, :1], four_factor_pre.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,10.48
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,2.84e-07
Time:,12:06:28,Log-Likelihood:,340.33
No. Observations:,120,AIC:,-670.7
Df Residuals:,115,BIC:,-656.7
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0004,0.001,-0.259,0.796,-0.003,0.002
mktrf,-0.0289,0.033,-0.879,0.381,-0.094,0.036
smb,-0.0798,0.037,-2.128,0.035,-0.154,-0.006
hml,0.0016,0.038,0.043,0.966,-0.073,0.076
umd,0.1156,0.023,4.992,0.000,0.070,0.161

0,1,2,3
Omnibus:,6.546,Durbin-Watson:,1.864
Prob(Omnibus):,0.038,Jarque-Bera (JB):,10.811
Skew:,0.1,Prob(JB):,0.00449
Kurtosis:,4.457,Cond. No.,33.2


### Carhart Four Factor Model - Newey West (9 Lag)

In [24]:
sm.OLS(four_factor_pre.iloc[:, :1], four_factor_pre.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,2.817
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.0284
Time:,12:06:28,Log-Likelihood:,340.33
No. Observations:,120,AIC:,-670.7
Df Residuals:,115,BIC:,-656.7
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0004,0.002,-0.235,0.814,-0.003,0.003
mktrf,-0.0289,0.028,-1.040,0.298,-0.083,0.026
smb,-0.0798,0.043,-1.857,0.063,-0.164,0.004
hml,0.0016,0.028,0.057,0.954,-0.053,0.056
umd,0.1156,0.044,2.630,0.009,0.029,0.202

0,1,2,3
Omnibus:,6.546,Durbin-Watson:,1.864
Prob(Omnibus):,0.038,Jarque-Bera (JB):,10.811
Skew:,0.1,Prob(JB):,0.00449
Kurtosis:,4.457,Cond. No.,33.2


### Carhart Four Factor Model - Newey West (18 Lag)

In [25]:
sm.OLS(four_factor_pre.iloc[:, :1], four_factor_pre.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.267
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,3.413
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.0112
Time:,12:06:28,Log-Likelihood:,340.33
No. Observations:,120,AIC:,-670.7
Df Residuals:,115,BIC:,-656.7
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0004,0.002,-0.199,0.842,-0.004,0.003
mktrf,-0.0289,0.027,-1.053,0.292,-0.083,0.025
smb,-0.0798,0.035,-2.277,0.023,-0.148,-0.011
hml,0.0016,0.023,0.070,0.945,-0.043,0.047
umd,0.1156,0.041,2.810,0.005,0.035,0.196

0,1,2,3
Omnibus:,6.546,Durbin-Watson:,1.864
Prob(Omnibus):,0.038,Jarque-Bera (JB):,10.811
Skew:,0.1,Prob(JB):,0.00449
Kurtosis:,4.457,Cond. No.,33.2


## Post-Crisis Regressions

### Q Factor Model

In [26]:
sm.OLS(q_factor_post.iloc[:, :1], q_factor_post.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.179
Model:,OLS,Adj. R-squared:,0.149
Method:,Least Squares,F-statistic:,5.933
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,0.000234
Time:,12:06:28,Log-Likelihood:,405.04
No. Observations:,114,AIC:,-800.1
Df Residuals:,109,BIC:,-786.4
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0015,0.001,2.061,0.042,5.59e-05,0.003
mktrf,-0.0483,0.021,-2.316,0.022,-0.090,-0.007
me,-0.0881,0.032,-2.738,0.007,-0.152,-0.024
ia,-0.0221,0.045,-0.494,0.622,-0.110,0.066
roe,-0.0146,0.039,-0.372,0.711,-0.092,0.063

0,1,2,3
Omnibus:,2.131,Durbin-Watson:,2.039
Prob(Omnibus):,0.345,Jarque-Bera (JB):,2.17
Skew:,-0.301,Prob(JB):,0.338
Kurtosis:,2.692,Cond. No.,67.7


### Q Factor Model - Newey West (9 Lag)

In [27]:
sm.OLS(q_factor_post.iloc[:, :1], q_factor_post.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.179
Model:,OLS,Adj. R-squared:,0.149
Method:,Least Squares,F-statistic:,19.34
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,4.79e-12
Time:,12:06:28,Log-Likelihood:,405.04
No. Observations:,114,AIC:,-800.1
Df Residuals:,109,BIC:,-786.4
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0015,0.001,2.372,0.018,0.000,0.003
mktrf,-0.0483,0.019,-2.529,0.011,-0.086,-0.011
me,-0.0881,0.036,-2.474,0.013,-0.158,-0.018
ia,-0.0221,0.043,-0.508,0.611,-0.107,0.063
roe,-0.0146,0.041,-0.357,0.721,-0.095,0.066

0,1,2,3
Omnibus:,2.131,Durbin-Watson:,2.039
Prob(Omnibus):,0.345,Jarque-Bera (JB):,2.17
Skew:,-0.301,Prob(JB):,0.338
Kurtosis:,2.692,Cond. No.,67.7


### Q Factor Model - Newey West (18 Lag)

In [28]:
sm.OLS(q_factor_post.iloc[:, :1], q_factor_post.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.179
Model:,OLS,Adj. R-squared:,0.149
Method:,Least Squares,F-statistic:,30.65
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,4.33e-17
Time:,12:06:28,Log-Likelihood:,405.04
No. Observations:,114,AIC:,-800.1
Df Residuals:,109,BIC:,-786.4
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0015,0.001,2.635,0.008,0.000,0.003
mktrf,-0.0483,0.018,-2.625,0.009,-0.084,-0.012
me,-0.0881,0.037,-2.384,0.017,-0.160,-0.016
ia,-0.0221,0.047,-0.467,0.640,-0.115,0.070
roe,-0.0146,0.042,-0.347,0.729,-0.097,0.068

0,1,2,3
Omnibus:,2.131,Durbin-Watson:,2.039
Prob(Omnibus):,0.345,Jarque-Bera (JB):,2.17
Skew:,-0.301,Prob(JB):,0.338
Kurtosis:,2.692,Cond. No.,67.7


### Fama French Five Factor Model

In [29]:
sm.OLS(fama_french_five_post.iloc[:, :1], fama_french_five_post.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.245
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,8.862
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,3.11e-06
Time:,12:06:28,Log-Likelihood:,409.86
No. Observations:,114,AIC:,-809.7
Df Residuals:,109,BIC:,-796.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0011,0.001,1.673,0.097,-0.000,0.002
mktrf,-0.0390,0.019,-2.058,0.042,-0.076,-0.001
smb,-0.0848,0.030,-2.872,0.005,-0.143,-0.026
hml,0.0149,0.030,0.489,0.626,-0.045,0.075
umd,0.0747,0.023,3.259,0.001,0.029,0.120

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,2.061
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.302
Skew:,-0.222,Prob(JB):,0.522
Kurtosis:,2.724,Cond. No.,52.2


### Fama French Five Factor Model - Newey West (9 Lag)

In [30]:
sm.OLS(fama_french_five_post.iloc[:, :1], fama_french_five_post.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.245
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,27.17
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,1.19e-15
Time:,12:06:28,Log-Likelihood:,409.86
No. Observations:,114,AIC:,-809.7
Df Residuals:,109,BIC:,-796.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0011,0.001,1.833,0.067,-7.8e-05,0.002
mktrf,-0.0390,0.014,-2.822,0.005,-0.066,-0.012
smb,-0.0848,0.026,-3.248,0.001,-0.136,-0.034
hml,0.0149,0.028,0.522,0.602,-0.041,0.071
umd,0.0747,0.020,3.813,0.000,0.036,0.113

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,2.061
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.302
Skew:,-0.222,Prob(JB):,0.522
Kurtosis:,2.724,Cond. No.,52.2


### Fama French Five Factor Model - Newey West (18 Lag)

In [31]:
sm.OLS(fama_french_five_post.iloc[:, :1], fama_french_five_post.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.245
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,29.44
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,1.35e-16
Time:,12:06:28,Log-Likelihood:,409.86
No. Observations:,114,AIC:,-809.7
Df Residuals:,109,BIC:,-796.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0011,0.001,1.953,0.051,-3.95e-06,0.002
mktrf,-0.0390,0.013,-3.113,0.002,-0.063,-0.014
smb,-0.0848,0.030,-2.845,0.004,-0.143,-0.026
hml,0.0149,0.028,0.523,0.601,-0.041,0.071
umd,0.0747,0.020,3.673,0.000,0.035,0.115

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,2.061
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.302
Skew:,-0.222,Prob(JB):,0.522
Kurtosis:,2.724,Cond. No.,52.2


### Carhart Four Factor Model

In [32]:
sm.OLS(four_factor_post.iloc[:, :1], four_factor_post.iloc[:, 1:]).fit().summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.245
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,8.862
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,3.11e-06
Time:,12:06:28,Log-Likelihood:,409.86
No. Observations:,114,AIC:,-809.7
Df Residuals:,109,BIC:,-796.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0011,0.001,1.673,0.097,-0.000,0.002
mktrf,-0.0390,0.019,-2.058,0.042,-0.076,-0.001
smb,-0.0848,0.030,-2.872,0.005,-0.143,-0.026
hml,0.0149,0.030,0.489,0.626,-0.045,0.075
umd,0.0747,0.023,3.259,0.001,0.029,0.120

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,2.061
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.302
Skew:,-0.222,Prob(JB):,0.522
Kurtosis:,2.724,Cond. No.,52.2


### Carhart Four Factor Model - Newey West (9 Lag)

In [33]:
sm.OLS(four_factor_post.iloc[:, :1], four_factor_post.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':9}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.245
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,27.17
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,1.19e-15
Time:,12:06:28,Log-Likelihood:,409.86
No. Observations:,114,AIC:,-809.7
Df Residuals:,109,BIC:,-796.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0011,0.001,1.833,0.067,-7.8e-05,0.002
mktrf,-0.0390,0.014,-2.822,0.005,-0.066,-0.012
smb,-0.0848,0.026,-3.248,0.001,-0.136,-0.034
hml,0.0149,0.028,0.522,0.602,-0.041,0.071
umd,0.0747,0.020,3.813,0.000,0.036,0.113

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,2.061
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.302
Skew:,-0.222,Prob(JB):,0.522
Kurtosis:,2.724,Cond. No.,52.2


### Carhart Four Factor Model - Newey West (18 Lag)

In [34]:
sm.OLS(four_factor_post.iloc[:, :1], four_factor_post.iloc[:, 1:]).fit(
    cov_type='HAC', cov_kwds={'maxlags':18}).summary()

0,1,2,3
Dep. Variable:,port_return,R-squared:,0.245
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,29.44
Date:,"Thu, 23 Apr 2020",Prob (F-statistic):,1.35e-16
Time:,12:06:28,Log-Likelihood:,409.86
No. Observations:,114,AIC:,-809.7
Df Residuals:,109,BIC:,-796.0
Df Model:,4,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0011,0.001,1.953,0.051,-3.95e-06,0.002
mktrf,-0.0390,0.013,-3.113,0.002,-0.063,-0.014
smb,-0.0848,0.030,-2.845,0.004,-0.143,-0.026
hml,0.0149,0.028,0.523,0.601,-0.041,0.071
umd,0.0747,0.020,3.673,0.000,0.035,0.115

0,1,2,3
Omnibus:,1.213,Durbin-Watson:,2.061
Prob(Omnibus):,0.545,Jarque-Bera (JB):,1.302
Skew:,-0.222,Prob(JB):,0.522
Kurtosis:,2.724,Cond. No.,52.2
