In [188]:
# We created many factors in computstat.ipynb
# We will use them to create a model

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from fama_data.FamaData import FamaData
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [189]:
# test against the momentum value-weighted portfolio as always
url = 'http://diether.org/prephd/10-port_mom_ew.csv'
port = pd.read_csv(url,parse_dates=['caldt'])
port = port.query('caldt >= "1963-07-31" and caldt <= "2023-02-28"')
port['spread'] = port['p4'] - port['p0']
port.info()

<class 'pandas.core.frame.DataFrame'>
Index: 711 entries, 438 to 1148
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   caldt   711 non-null    datetime64[ns]
 1   p0      711 non-null    float64       
 2   p1      711 non-null    float64       
 3   p2      711 non-null    float64       
 4   p3      711 non-null    float64       
 5   p4      711 non-null    float64       
 6   spread  711 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 44.4 KB


In [190]:
debt_ratio = pd.read_csv('exported/debt_ratio_vw.csv',parse_dates=['caldt'], usecols=['caldt','debt_HML'])
efficiency = pd.read_csv('exported/effc_vw.csv',parse_dates=['caldt'], usecols=['caldt','effc_HML'])
fixed_asset_turnover = pd.read_csv('exported/fat_vw.csv',parse_dates=['caldt'], usecols=['caldt','fat_HML'])
investment = pd.read_csv('exported/invst_vw.csv',parse_dates=['caldt'], usecols=['caldt','invst_HML'])
quick_ratio = pd.read_csv('exported/quick_vw.csv',parse_dates=['caldt'], usecols=['caldt','quick_HML'])
return_on_equity = pd.read_csv('exported/roe_vw.csv',parse_dates=['caldt'], usecols=['caldt','roe_HML'])

# And just for fun,
lasso_industry = pd.read_csv('exported/lasso_industry_factors.csv',parse_dates=['caldt'])

# And of course our boring Fama-French factors
alt_ff = pd.read_csv('exported/alt_ff_factors.csv',parse_dates=['caldt'])

In [191]:
built_factors = pd.merge(
                    pd.merge(quick_ratio, return_on_equity, on='caldt', how='inner'),
                    pd.merge(
                        pd.merge(efficiency, debt_ratio, on='caldt', how='inner'),
                        pd.merge(fixed_asset_turnover, investment, on='caldt', how='inner'),
                        on='caldt', how='inner'),
                    on='caldt', how='inner')

In [192]:
df = pd.merge(built_factors, port, on='caldt', how='inner')
df.head(3)

Unnamed: 0,caldt,quick_HML,roe_HML,effc_HML,debt_HML,fat_HML,invst_HML,p0,p1,p2,p3,p4,spread
0,1965-12-31,-0.009925,-0.126524,-0.001614,-0.022632,0.001718,-0.126524,4.326711,1.94664,3.072755,3.777498,4.059255,-0.267456
1,1968-12-31,0.020383,-0.022038,-0.021855,-0.009333,0.025394,0.099394,-1.046688,0.344543,1.146129,0.926564,0.036382,1.08307
2,1969-12-31,0.043829,-0.117758,0.03278,-0.033874,0.002409,0.014113,-11.979345,-9.101954,-6.334108,-4.159768,-2.02969,9.949655


In [193]:
smf.ols('spread ~ roe_HML + quick_HML + fat_HML + invst_HML + effc_HML + debt_HML', data=df).fit().summary()

0,1,2,3
Dep. Variable:,spread,R-squared:,0.024
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,1.41
Date:,"Fri, 14 Apr 2023",Prob (F-statistic):,0.21
Time:,00:12:57,Log-Likelihood:,-1092.4
No. Observations:,358,AIC:,2199.0
Df Residuals:,351,BIC:,2226.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.8879,0.275,6.864,0.000,1.347,2.429
roe_HML,2.7173,2.056,1.321,0.187,-1.327,6.761
quick_HML,6.5422,3.910,1.673,0.095,-1.149,14.233
fat_HML,1.9059,4.597,0.415,0.679,-7.136,10.948
invst_HML,2.5602,2.444,1.048,0.296,-2.246,7.366
effc_HML,5.8841,4.421,1.331,0.184,-2.811,14.579
debt_HML,7.0654,4.293,1.646,0.101,-1.378,15.509

0,1,2,3
Omnibus:,74.417,Durbin-Watson:,2.16
Prob(Omnibus):,0.0,Jarque-Bera (JB):,598.609
Skew:,-0.594,Prob(JB):,1.03e-130
Kurtosis:,9.222,Cond. No.,20.9


In [194]:
# Ok, let's add in our FF factors!
fd = FamaData('data/', end_date = '2022-09-30')

In [195]:
# Classic Fama-French 3-factor model components
excess_returns = fd.get_excess_return_data()['exmt']
SMB_proxy = fd.get_size_data()['Hi 10'] - fd.get_size_data()['Lo 10']
HML_proxy = fd.get_book_to_market_data()['Hi 10'] - fd.get_book_to_market_data()['Lo 10']

# Fama-French 5-factor model components
RMW_proxy = fd.get_operating_profitability_data()['Hi 10'] - fd.get_operating_profitability_data()['Lo 10']
CMA_proxy = fd.get_investment_data()['Hi 10'] - fd.get_investment_data()['Lo 10']

# And momentum
MOM_proxy = fd.get_momentum_data()['Hi PRIOR'] - fd.get_momentum_data()['Lo PRIOR']

SMB_proxy.name = 'SMB'
HML_proxy.name = 'HML'
RMW_proxy.name = 'RMW'
CMA_proxy.name = 'CMA'
MOM_proxy.name = 'MOM'

In [196]:
# Critical for concat to work properly
for x in [excess_returns,  SMB_proxy, HML_proxy, RMW_proxy, CMA_proxy]:
    x.reset_index(drop=True, inplace=True)

ff = pd.concat([excess_returns, SMB_proxy, HML_proxy, RMW_proxy, CMA_proxy, port['caldt'].reset_index(drop = True)], axis=1)
ff.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 711 entries, 0 to 710
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   exmt    711 non-null    float64       
 1   SMB     711 non-null    float64       
 2   HML     711 non-null    float64       
 3   RMW     711 non-null    float64       
 4   CMA     711 non-null    float64       
 5   caldt   711 non-null    datetime64[ns]
dtypes: datetime64[ns](1), float64(5)
memory usage: 33.5 KB


In [197]:
ff

Unnamed: 0,exmt,SMB,HML,RMW,CMA,caldt
0,-0.0039,0.55,-2.66,2.42,3.07,1963-07-31
1,0.0507,2.67,1.67,0.54,2.32,1963-08-30
2,-0.0157,-0.73,-1.77,2.05,-1.02,1963-09-30
3,0.0253,2.87,-2.97,8.11,1.47,1963-10-31
4,-0.0085,1.45,0.82,-3.91,-2.49,1963-11-29
...,...,...,...,...,...,...
706,-0.0034,1.34,9.16,8.83,-13.96,2022-05-31
707,-0.0843,-1.56,-6.49,3.12,10.19,2022-06-30
708,0.0957,3.14,-8.06,0.52,8.70,2022-07-29
709,-0.0377,-5.05,3.84,-8.18,-2.85,2022-08-31


In [198]:
df = pd.merge(df, ff, on='caldt', how='inner')

In [199]:
df

Unnamed: 0,caldt,quick_HML,roe_HML,effc_HML,debt_HML,fat_HML,invst_HML,p0,p1,p2,p3,p4,spread,exmt,SMB,HML,RMW,CMA
0,1965-12-31,-0.009925,-0.126524,-0.001614,-0.022632,0.001718,-0.126524,4.326711,1.946640,3.072755,3.777498,4.059255,-0.267456,0.0101,-4.49,3.11,-7.77,-1.84
1,1968-12-31,0.020383,-0.022038,-0.021855,-0.009333,0.025394,0.099394,-1.046688,0.344543,1.146129,0.926564,0.036382,1.083070,-0.0394,-7.96,2.60,-3.21,-3.60
2,1969-12-31,0.043829,-0.117758,0.032780,-0.033874,0.002409,0.014113,-11.979345,-9.101954,-6.334108,-4.159768,-2.029690,9.949655,-0.0263,7.71,-6.32,4.50,1.30
3,1970-03-31,-0.035097,0.112822,-0.075870,0.028897,0.051640,-0.112822,-4.020982,-2.056002,-0.746098,-0.794346,-3.600642,0.420340,-0.0106,4.19,4.16,-0.43,-4.61
4,1970-06-30,-0.085851,0.068333,-0.066905,0.025560,-0.077714,0.068333,-12.163971,-10.862737,-7.003340,-6.064191,-2.507535,9.656436,-0.0579,3.13,-1.44,3.13,-3.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,2022-03-31,0.054065,0.038691,-0.099951,-0.030934,0.080489,-0.034053,-0.062720,-0.159941,0.989489,1.019774,2.040128,2.102849,0.0305,2.34,-7.09,-0.03,-3.51
354,2022-05-31,0.006511,0.083940,-0.006898,0.029709,-0.020769,0.077827,-5.392614,-2.553349,0.502323,-0.183222,1.199527,6.592141,-0.0034,1.34,9.16,8.83,-13.96
355,2022-06-30,-0.094965,-0.028065,-0.034286,-0.000199,-0.009501,0.020381,-5.344633,-8.743887,-8.291228,-4.778448,-9.131728,-3.787095,-0.0843,-1.56,-6.49,3.12,10.19
356,2022-08-31,-0.040840,-0.049699,0.000744,-0.009497,0.011343,-0.204109,0.317134,-2.100848,-2.140245,-2.222316,0.123339,-0.193795,-0.0377,-5.05,3.84,-8.18,-2.85


In [200]:
smf.ols('spread ~ exmt + roe_HML + quick_HML + fat_HML + invst_HML + effc_HML + debt_HML + SMB + HML + RMW + CMA', data=df).fit().summary()

0,1,2,3
Dep. Variable:,spread,R-squared:,0.112
Model:,OLS,Adj. R-squared:,0.084
Method:,Least Squares,F-statistic:,3.982
Date:,"Fri, 14 Apr 2023",Prob (F-statistic):,1.77e-05
Time:,00:12:59,Log-Likelihood:,-1075.4
No. Observations:,358,AIC:,2175.0
Df Residuals:,346,BIC:,2221.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.7898,0.273,6.560,0.000,1.253,2.326
exmt,-8.7035,6.281,-1.386,0.167,-21.058,3.651
roe_HML,1.2178,2.074,0.587,0.557,-2.861,5.296
quick_HML,7.6806,4.052,1.896,0.059,-0.288,15.649
fat_HML,3.4431,4.482,0.768,0.443,-5.373,12.259
invst_HML,1.7949,2.383,0.753,0.452,-2.893,6.483
effc_HML,6.8668,4.288,1.602,0.110,-1.566,15.300
debt_HML,6.9744,4.367,1.597,0.111,-1.616,15.564
SMB,-0.0758,0.065,-1.158,0.248,-0.204,0.053

0,1,2,3
Omnibus:,47.301,Durbin-Watson:,2.174
Prob(Omnibus):,0.0,Jarque-Bera (JB):,240.263
Skew:,-0.381,Prob(JB):,6.72e-53
Kurtosis:,6.94,Cond. No.,156.0


In [201]:
smf.ols('p4 ~ exmt + roe_HML + quick_HML + fat_HML + invst_HML + effc_HML + debt_HML + SMB + HML + RMW + CMA', data=df).fit().summary()

0,1,2,3
Dep. Variable:,p4,R-squared:,0.895
Model:,OLS,Adj. R-squared:,0.892
Method:,Least Squares,F-statistic:,267.8
Date:,"Fri, 14 Apr 2023",Prob (F-statistic):,7.97e-162
Time:,00:12:59,Log-Likelihood:,-768.37
No. Observations:,358,AIC:,1561.0
Df Residuals:,346,BIC:,1607.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.1310,0.116,9.772,0.000,0.903,1.359
exmt,105.7607,2.665,39.691,0.000,100.520,111.002
roe_HML,-0.2453,0.880,-0.279,0.781,-1.975,1.485
quick_HML,3.7605,1.719,2.188,0.029,0.380,7.141
fat_HML,2.5124,1.901,1.321,0.187,-1.227,6.252
invst_HML,0.5699,1.011,0.564,0.573,-1.419,2.558
effc_HML,1.8830,1.819,1.035,0.301,-1.694,5.460
debt_HML,2.6455,1.853,1.428,0.154,-0.998,6.289
SMB,-0.5156,0.028,-18.583,0.000,-0.570,-0.461

0,1,2,3
Omnibus:,17.447,Durbin-Watson:,2.267
Prob(Omnibus):,0.0,Jarque-Bera (JB):,46.232
Skew:,-0.038,Prob(JB):,9.14e-11
Kurtosis:,4.759,Cond. No.,156.0


In [202]:
share_issue_delta = fd.get_net_share_issue_data()['Hi 10'] - fd.get_net_share_issue_data()['Lo 10']

shortterm_rev_delta = fd.get_shortterm_reversal_data()['Hi PRIOR'] - fd.get_shortterm_reversal_data()['Lo PRIOR']
longterm_rev_delta = fd.get_longterm_reversal_data()['Hi PRIOR'] - fd.get_longterm_reversal_data()['Lo PRIOR']

innov_delta = fd.get_10_industry_data()['HiTec'] - fd.get_10_industry_data()['NoDur'] 
growth_delta = .5*fd.get_10_industry_data()['HiTec'] + .5*fd.get_10_industry_data()['Hlth '] \
             - .5*fd.get_10_industry_data()['Utils'] - .5*fd.get_10_industry_data()['Manuf']
durable_delta = fd.get_10_industry_data()['Durbl'] - fd.get_10_industry_data()['Manuf']

service_minus_goods = 2 * fd.get_49_industry_data()['PerSv'] + 2 * fd.get_49_industry_data()['BusSv'] \
                    + fd.get_49_industry_data()['Softw'] + fd.get_49_industry_data()['Insur'] \
                    + fd.get_49_industry_data()['Fins'] + fd.get_49_industry_data()['Banks'] \
                    - fd.get_49_industry_data()['Hshld'] - fd.get_49_industry_data()['BldMt'] \
                    - fd.get_49_industry_data()['ElcEq'] - fd.get_49_industry_data()['Aero'] \
                    - fd.get_49_industry_data()['Food '] - fd.get_49_industry_data()['Oil'] \
                    - 2 * fd.get_49_industry_data()['Whlsl']

needs_minus_wants = fd.get_49_industry_data()['Food '] + fd.get_49_industry_data()['Util'] \
                    + fd.get_49_industry_data()['Hlth '] + fd.get_49_industry_data()['MedEq'] \
                    + fd.get_49_industry_data()['Drugs'] + fd.get_49_industry_data()['Insur'] \
                    - fd.get_49_industry_data()['Guns'] - fd.get_49_industry_data()['Softw'] \
                    - fd.get_49_industry_data()['Autos'] - fd.get_49_industry_data()['Whlsl'] \
                    - 2*fd.get_49_industry_data()['Hshld']


accrual_delta = fd.get_accrual_data()['Hi 10'] - fd.get_accrual_data()['Lo 10']
cashflow_price_delta = fd.get_cashflow_price_data()['Hi 10'] - fd.get_cashflow_price_data()['Lo 10']
market_beta_delta = fd.get_market_beta_data()['Hi 10'] - fd.get_market_beta_data()['Lo 10']
dividend_yield_delta = fd.get_dividend_yield_data()['Hi 10'] - fd.get_dividend_yield_data()['Lo 10']
earnings_price_delta = fd.get_earnings_price_data()['Hi 10'] - fd.get_earnings_price_data()['Lo 10']
variance_delta = fd.get_variance_data()['Hi 10'] - fd.get_variance_data()['Lo 10']


share_issue_delta.name = 'share_issue_delta'
shortterm_rev_delta.name = 'shortterm_rev_delta'
longterm_rev_delta.name = 'longterm_rev_delta'
innov_delta.name = 'innov_delta'
growth_delta.name = 'growth_delta'
durable_delta.name = 'durable_delta'
service_minus_goods.name = 'service_minus_goods'
needs_minus_wants.name = 'needs_minus_wants'
accrual_delta.name = 'accrual_delta'
cashflow_price_delta.name = 'cashflow_price_delta'
market_beta_delta.name = 'market_beta_delta'
dividend_yield_delta.name = 'dividend_yield_delta'
earnings_price_delta.name = 'earnings_price_delta'
variance_delta.name = 'variance_delta'

# Let's put all the proxies into a dataframe
# Critical for concat to work properly
for x in [share_issue_delta, shortterm_rev_delta, longterm_rev_delta, 
                innov_delta, growth_delta, durable_delta, service_minus_goods, needs_minus_wants, 
                accrual_delta, cashflow_price_delta, market_beta_delta, dividend_yield_delta, 
                earnings_price_delta, variance_delta]:
    x.reset_index(drop=True, inplace=True)

df2 = pd.concat([share_issue_delta, shortterm_rev_delta, longterm_rev_delta, 
                innov_delta, growth_delta, durable_delta, service_minus_goods, needs_minus_wants, 
                accrual_delta, cashflow_price_delta, market_beta_delta, dividend_yield_delta, 
                earnings_price_delta, variance_delta,port['caldt'].reset_index(drop= True)], axis=1)
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 711 entries, 0 to 710
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   share_issue_delta     711 non-null    float64       
 1   shortterm_rev_delta   711 non-null    float64       
 2   longterm_rev_delta    711 non-null    float64       
 3   innov_delta           711 non-null    float64       
 4   growth_delta          711 non-null    float64       
 5   durable_delta         711 non-null    float64       
 6   service_minus_goods   711 non-null    float64       
 7   needs_minus_wants     711 non-null    float64       
 8   accrual_delta         711 non-null    float64       
 9   cashflow_price_delta  711 non-null    float64       
 10  market_beta_delta     711 non-null    float64       
 11  dividend_yield_delta  711 non-null    float64       
 12  earnings_price_delta  711 non-null    float64       
 13  variance_delta      

In [203]:
df = pd.merge(df, df2, on='caldt', how='inner')
df.head(3)

Unnamed: 0,caldt,quick_HML,roe_HML,effc_HML,debt_HML,fat_HML,invst_HML,p0,p1,p2,...,growth_delta,durable_delta,service_minus_goods,needs_minus_wants,accrual_delta,cashflow_price_delta,market_beta_delta,dividend_yield_delta,earnings_price_delta,variance_delta
0,1965-12-31,-0.009925,-0.126524,-0.001614,-0.022632,0.001718,-0.126524,4.326711,1.94664,3.072755,...,-0.32,-2.01,57.71,-121.2,-0.73,0.34,4.37,0.44,1.59,4.67
1,1968-12-31,0.020383,-0.022038,-0.021855,-0.009333,0.025394,0.099394,-1.046688,0.344543,1.146129,...,-0.065,-1.04,-12.54,-75.8,-0.94,2.98,2.49,1.55,3.54,5.06
2,1969-12-31,0.043829,-0.117758,0.03278,-0.033874,0.002409,0.014113,-11.979345,-9.101954,-6.334108,...,2.815,-2.0,3.86,36.19,-1.62,-3.9,-2.7,-3.35,-4.21,-6.41


In [204]:
smf.ols('p4 ~ exmt + roe_HML + quick_HML + fat_HML + invst_HML + effc_HML + debt_HML + SMB + HML + RMW + CMA + share_issue_delta + shortterm_rev_delta + longterm_rev_delta + innov_delta + growth_delta + durable_delta + service_minus_goods + needs_minus_wants + accrual_delta + cashflow_price_delta + market_beta_delta + dividend_yield_delta + earnings_price_delta + variance_delta', data=df).fit().summary()

0,1,2,3
Dep. Variable:,p4,R-squared:,0.917
Model:,OLS,Adj. R-squared:,0.911
Method:,Least Squares,F-statistic:,147.6
Date:,"Fri, 14 Apr 2023",Prob (F-statistic):,2.1100000000000004e-163
Time:,00:13:07,Log-Likelihood:,-725.14
No. Observations:,358,AIC:,1502.0
Df Residuals:,332,BIC:,1603.0
Df Model:,25,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.0992,0.112,9.809,0.000,0.879,1.320
exmt,106.6384,3.425,31.139,0.000,99.902,113.375
roe_HML,-0.3826,0.829,-0.462,0.645,-2.012,1.247
quick_HML,3.1812,1.636,1.945,0.053,-0.036,6.398
fat_HML,2.2725,1.744,1.303,0.193,-1.158,5.703
invst_HML,-0.6060,0.935,-0.648,0.518,-2.446,1.234
effc_HML,0.3051,1.691,0.180,0.857,-3.022,3.632
debt_HML,3.8479,1.761,2.185,0.030,0.384,7.312
SMB,-0.5946,0.033,-18.104,0.000,-0.659,-0.530

0,1,2,3
Omnibus:,9.79,Durbin-Watson:,2.084
Prob(Omnibus):,0.007,Jarque-Bera (JB):,14.619
Skew:,0.186,Prob(JB):,0.000669
Kurtosis:,3.918,Cond. No.,799.0


In [205]:
dropped = df[['caldt', 'p0', 'p1', 'p2', 'p3', 'p4', 'spread']]
df = df.drop(['caldt', 'p0', 'p1', 'p2', 'p3', 'p4', 'spread'], axis=1)
vif_data = pd.DataFrame()
vif_data["feature"] = df.columns

vif_data["VIF"] = [variance_inflation_factor(df.values, i)
                          for i in range(len(df.columns))]
vif_data

Unnamed: 0,feature,VIF
0,quick_HML,2.124071
1,roe_HML,1.573579
2,effc_HML,1.588923
3,debt_HML,1.881982
4,fat_HML,1.456265
5,invst_HML,1.42608
6,exmt,2.3936
7,SMB,2.676958
8,HML,3.525621
9,RMW,2.971206


In [206]:
# TODO: Remove the features with high VIF
df = df.drop(['cashflow_price_delta', 'variance_delta', 'market_beta_delta'], axis=1)
vif_data = pd.DataFrame()
vif_data["feature"] = df.columns

vif_data["VIF"] = [variance_inflation_factor(df.values, i)
                          for i in range(len(df.columns))]
vif_data

Unnamed: 0,feature,VIF
0,quick_HML,2.11965
1,roe_HML,1.542634
2,effc_HML,1.585382
3,debt_HML,1.865959
4,fat_HML,1.440309
5,invst_HML,1.41644
6,exmt,1.886034
7,SMB,2.236933
8,HML,3.33897
9,RMW,2.616131


In [207]:
df[['caldt', 'p0', 'p1', 'p2', 'p3', 'p4', 'spread']] = dropped[['caldt', 'p0', 'p1', 'p2', 'p3', 'p4', 'spread']]
df

Unnamed: 0,quick_HML,roe_HML,effc_HML,debt_HML,fat_HML,invst_HML,exmt,SMB,HML,RMW,...,accrual_delta,dividend_yield_delta,earnings_price_delta,caldt,p0,p1,p2,p3,p4,spread
0,-0.009925,-0.126524,-0.001614,-0.022632,0.001718,-0.126524,0.0101,-4.49,3.11,-7.77,...,-0.73,0.44,1.59,1965-12-31,4.326711,1.946640,3.072755,3.777498,4.059255,-0.267456
1,0.020383,-0.022038,-0.021855,-0.009333,0.025394,0.099394,-0.0394,-7.96,2.60,-3.21,...,-0.94,1.55,3.54,1968-12-31,-1.046688,0.344543,1.146129,0.926564,0.036382,1.083070
2,0.043829,-0.117758,0.032780,-0.033874,0.002409,0.014113,-0.0263,7.71,-6.32,4.50,...,-1.62,-3.35,-4.21,1969-12-31,-11.979345,-9.101954,-6.334108,-4.159768,-2.029690,9.949655
3,-0.035097,0.112822,-0.075870,0.028897,0.051640,-0.112822,-0.0106,4.19,4.16,-0.43,...,0.42,6.42,5.20,1970-03-31,-4.020982,-2.056002,-0.746098,-0.794346,-3.600642,0.420340
4,-0.085851,0.068333,-0.066905,0.025560,-0.077714,0.068333,-0.0579,3.13,-1.44,3.13,...,0.28,4.43,6.30,1970-06-30,-12.163971,-10.862737,-7.003340,-6.064191,-2.507535,9.656436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,0.054065,0.038691,-0.099951,-0.030934,0.080489,-0.034053,0.0305,2.34,-7.09,-0.03,...,-3.48,0.08,-5.98,2022-03-31,-0.062720,-0.159941,0.989489,1.019774,2.040128,2.102849
354,0.006511,0.083940,-0.006898,0.029709,-0.020769,0.077827,-0.0034,1.34,9.16,8.83,...,1.58,7.46,7.99,2022-05-31,-5.392614,-2.553349,0.502323,-0.183222,1.199527,6.592141
355,-0.094965,-0.028065,-0.034286,-0.000199,-0.009501,0.020381,-0.0843,-1.56,-6.49,3.12,...,-1.08,4.10,-0.46,2022-06-30,-5.344633,-8.743887,-8.291228,-4.778448,-9.131728,-3.787095
356,-0.040840,-0.049699,0.000744,-0.009497,0.011343,-0.204109,-0.0377,-5.05,3.84,-8.18,...,-1.06,6.00,4.23,2022-08-31,0.317134,-2.100848,-2.140245,-2.222316,0.123339,-0.193795


In [208]:
smf.ols('p4 ~ exmt + roe_HML + quick_HML + fat_HML + invst_HML + effc_HML + debt_HML + SMB + HML + RMW + CMA + share_issue_delta + shortterm_rev_delta + longterm_rev_delta + innov_delta + growth_delta + durable_delta + service_minus_goods + needs_minus_wants + accrual_delta + earnings_price_delta + dividend_yield_delta', data=df).fit().summary()

0,1,2,3
Dep. Variable:,p4,R-squared:,0.915
Model:,OLS,Adj. R-squared:,0.909
Method:,Least Squares,F-statistic:,164.0
Date:,"Fri, 14 Apr 2023",Prob (F-statistic):,1.21e-164
Time:,00:13:07,Log-Likelihood:,-730.31
No. Observations:,358,AIC:,1507.0
Df Residuals:,335,BIC:,1596.0
Df Model:,22,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.0773,0.110,9.816,0.000,0.861,1.293
exmt,103.9872,3.055,34.043,0.000,97.979,109.996
roe_HML,-0.5299,0.828,-0.640,0.522,-2.158,1.098
quick_HML,3.3263,1.649,2.017,0.044,0.083,6.570
fat_HML,1.8264,1.752,1.042,0.298,-1.620,5.273
invst_HML,-0.4006,0.942,-0.425,0.671,-2.253,1.452
effc_HML,0.0678,1.706,0.040,0.968,-3.289,3.424
debt_HML,3.5442,1.771,2.001,0.046,0.061,7.028
SMB,-0.5889,0.030,-19.630,0.000,-0.648,-0.530

0,1,2,3
Omnibus:,18.135,Durbin-Watson:,2.133
Prob(Omnibus):,0.0,Jarque-Bera (JB):,35.34
Skew:,0.271,Prob(JB):,2.12e-08
Kurtosis:,4.44,Cond. No.,676.0
