In [157]:
import pandas as pd
import json
from sklearn.linear_model import LinearRegression
import numpy as np
from scipy import stats
from tqdm import tqdm
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [158]:
## Newey-West Adjustment For a Time-Series Data, e.g. Lambdas (factor returns)
def NW_ttest(ret,h0):
  T = len(ret)
  lag = int(4*(T/100)**(2/9))
  vv = np.var(ret)

  for l in range(0,lag):
      cc=np.cov(ret[0:(len(ret)-l)].T,ret[l:len(ret)].T)[0,1]
      vv=vv+2*(1-(l+1)/lag)*cc
      t=(np.mean(ret)-h0)/np.sqrt(vv)*np.sqrt(T)
  
  return t

## CAPM_OLS (Time-Varying Betas)

In [159]:
df_MR = pd.read_csv('df_MR.csv')

In [160]:
df_MR.Dates = df_MR.Dates.apply(lambda x:x[:10])

In [161]:
df_MR.set_index('Dates',inplace=True)

In [162]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [163]:
df_Ex_SR = df_SR-df_RF.values

In [164]:
df_Ex_SR = df_Ex_SR.loc[df_MR.index,:]

In [165]:
ols_model = LinearRegression()

In [None]:
n = 3

In [166]:
holding = 21
rollingW = holding * n *12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1

In [167]:
all_betas = []
# i is time in periods
for i in tqdm(range(periods)):

    df_MR_tmp = df_MR.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []
    # j is cross-sectional stock name
    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
            b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)

100%|██████████| 211/211 [01:45<00:00,  2.00it/s]


In [168]:
all_betas[0]

Unnamed: 0_level_0,MKT
stock,Unnamed: 1_level_1
ABC.SJ.Equity,0.06890337112683975
ABI.SJ.Equity,0.23352855971285957
ABR.SJ.Equity,-0.13818778126607012
ACL.SJ.Equity,0.957051285238076
ACP.SJ.Equity,-0.02694573125717277
...,...
WTC.SJ.Equity,0.360477310787068
YRK.SJ.Equity,-0.04329882197390028
ZPT.SJ.Equity,0.19615208275356721
ZRR.SJ.Equity,0.2535478802233808


In [169]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    # k is cross-sectional stock name 
    i = int(index_times.index(k) / holding)
    # i is time in periods
    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        ols = sm.OLS(df_return, sm.add_constant(all_betas[i].loc[df_index,:].astype(np.float64)))
        res = ols.fit()
        l = list(res.params)
        
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 4431/4431 [00:08<00:00, 529.53it/s]


In [170]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT'})

Unnamed: 0,Intercept,MKT
0,-0.000310,0.007612
1,0.000117,0.004575
2,0.000538,0.006294
3,-0.000497,0.008957
4,0.001682,-0.011479
...,...,...
4426,-0.000157,-0.001185
4427,0.000042,0.003854
4428,-0.000600,0.010929
4429,0.000744,0.003838


In [171]:
np.mean(all_lambdas[0])*21*100

0   -0.278127
1    0.175176
dtype: float64

In [172]:
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([-6.84868671,  0.51443134]), pvalue=array([8.47511422e-12, 6.06976112e-01]))

In [173]:
## New-West Adjustment for CAPM lambda
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis = 0)

0   -3.784794
1    0.303559
dtype: float64

### Adding Firm Characteristics

In [174]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [175]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [176]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [177]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)

True

In [178]:
# Stock returns are lagged for 21 days than firm characteristics 
Gap = 21

In [179]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1
periods

211

In [180]:
all_lambdas = []
all_intercepts = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)


for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is date t in len(df_Ex_SR_tmp) days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index = (((all_betas[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                   intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index))
    
    if len(df_index) != 0:
        
        l = list(ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).coef_)
        
        h = ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                      df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                          (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).intercept_
        
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 4410/4410 [00:22<00:00, 196.70it/s]


In [181]:
all_intercepts[0]

Unnamed: 0,0
0,0.004273
1,-0.006672
2,0.008334
3,-0.000321
4,-0.001663
...,...
4405,-0.002820
4406,-0.002112
4407,-0.002297
4408,0.001522


In [182]:
all_lambdas[0].rename(columns={0:'MKT',1:'Size',2:'PE'})

Unnamed: 0,MKT,Size,PE
0,0.005195,-0.000667,0.002943
1,-0.016951,0.001362,-0.004454
2,0.005514,-0.001419,0.000185
3,0.000000,0.000000,0.000000
4,-0.004689,0.000391,-0.001356
...,...,...,...
4405,-0.003670,0.000556,0.000838
4406,0.004859,0.000192,-0.002729
4407,0.010092,0.000185,-0.000602
4408,0.006236,-0.000022,-0.001065


In [183]:
np.mean(all_intercepts[0])*21*100

0   -0.792507
dtype: float64

In [184]:
np.mean(all_lambdas[0])*21*100

0   -0.401659
1    0.098494
2   -0.498192
dtype: float64

In [185]:
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([-5.87125602]), pvalue=array([4.6427333e-09]))

In [186]:
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([-1.05181009,  4.02944598, -8.45645117]), pvalue=array([2.92944331e-01, 5.68506357e-05, 3.70166343e-17]))

In [187]:
## Newey-West Adjustment 
all_intercepts[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-3.71056021])

In [188]:
## Newey-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.63526881,  2.51466733, -5.32471764])

## FF3-Factor_OLS (Time-Varying Betas)

In [189]:
ols_model = LinearRegression()

In [190]:
df_FF3 = pd.read_csv('df_FF3.csv')

In [191]:
df_FF3.Dates = df_FF3.Dates.apply(lambda x: x[:10])

In [192]:
df_FF3.set_index('Dates',inplace=True)

In [193]:
df_FF3

Unnamed: 0_level_0,MR_RF,SMB,HML
Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-12-20,-0.0083,0.001267,0.00510
2000-12-21,-0.0062,0.001933,-0.00240
2000-12-22,0.0163,0.002433,0.00670
2000-12-25,-0.0003,0.000000,0.00000
2000-12-26,-0.0003,0.000000,0.00000
...,...,...,...
2019-02-27,0.0008,-0.003700,-0.00155
2019-02-28,-0.0055,0.005000,-0.00145
2019-03-01,0.0034,-0.003433,0.00380
2019-03-04,0.0001,0.001267,0.00380


In [194]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [195]:
df_Ex_SR = df_SR-df_RF.values

In [196]:
df_Ex_SR = df_Ex_SR.loc[df_FF3.index,:]
df_SR.shape

(5215, 836)

In [197]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1

In [198]:
all_betas = []
# i is time
for i in tqdm(range(periods)):

    df_FF3_tmp = df_FF3.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []
    # j is stock
    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
            b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'SMB',2:'HML',3:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)



100%|██████████| 189/189 [01:47<00:00,  1.76it/s]


In [199]:
all_betas[0]

Unnamed: 0_level_0,MKT,SMB,HML
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABC.SJ.Equity,-0.11886881746759886,-0.7129746357980258,0.875250891422741
ABI.SJ.Equity,0.25783980152032315,-0.010324300961727153,-0.010932559606184007
ABR.SJ.Equity,-0.21197460481696337,0.778388720466541,-0.34772436567797277
ACL.SJ.Equity,0.9128206343490194,1.3235356352168206,-0.024148286154124676
ACP.SJ.Equity,-0.03951329859901125,-0.061075408168753176,0.14407439092975466
...,...,...,...
WTC.SJ.Equity,0.07080044688134128,-0.13632567541626667,-0.24204855792579677
YRK.SJ.Equity,-0.0700711207991542,-0.0839246623936035,0.09238358790580153
ZPT.SJ.Equity,0.07969652734074247,-0.9069146398011514,-0.3695302740365012
ZRR.SJ.Equity,0.1554158589313682,1.3017774937158952,-0.510526061162279


In [200]:
all_lambdas = []
all_intercepts = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        l = list(ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).coef_)
        h = ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).intercept_
        lambdas.append(l)
        intercepts.append(h)
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3969/3969 [00:09<00:00, 423.33it/s]


In [201]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML'})

Unnamed: 0,MKT,SMB,HML
0,0.003147,-0.001208,-0.004000
1,0.006961,-0.003008,0.004089
2,-0.010578,-0.003330,0.010384
3,0.001320,0.009910,0.003706
4,-0.003854,-0.008944,0.007524
...,...,...,...
3964,0.003179,0.003167,0.003078
3965,-0.004597,0.000107,-0.005402
3966,0.007581,-0.000167,-0.007906
3967,-0.006288,0.001515,0.004452


In [202]:
np.mean(all_intercepts[0])*21*100

0   -0.269627
dtype: float64

In [203]:
np.mean(all_lambdas[0])*21*100

0    0.172828
1   -0.143496
2   -0.196709
dtype: float64

In [204]:
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([-6.3777705]), pvalue=array([2.00293675e-10]))

In [205]:
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([ 0.47387914, -0.78616496, -0.89747543]), pvalue=array([0.63561216, 0.4318178 , 0.36951973]))

In [206]:
## Newey-West Adjustment For FF3_Lambdas
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([ 0.2819852 , -0.51018802, -0.58278278])

### Adding firm characteristics as control variables

In [207]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [208]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [209]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [210]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)

True

In [211]:
# Stock returns are lagged for 21 days than firm characteristics 
Gap = 21

In [212]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1

In [213]:
all_lambdas = []
all_intercepts = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)


for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is date t in len(df_Ex_SR_tmp) days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index = (((all_betas[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                   intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index))
    
    if len(df_index) != 0:
        
        l = list(ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).coef_)
        
        h = ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                      df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                          (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).intercept_
        
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3948/3948 [00:22<00:00, 176.55it/s]


In [214]:
all_intercepts[0]

Unnamed: 0,0
0,0.003638
1,0.001445
2,-0.000196
3,-0.004393
4,0.007515
...,...
3943,-0.000049
3944,-0.004663
3945,0.002255
3946,-0.002130


In [215]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'Size',4:'PE'})

Unnamed: 0,MKT,SMB,HML,Size,PE
0,0.000168,-0.000213,0.006705,-0.000286,-0.001851
1,0.007100,0.002367,-0.001340,-0.000091,-0.000104
2,0.000000,0.000000,0.000000,0.000000,0.000000
3,-0.005140,0.002783,0.009165,0.001104,-0.002950
4,0.002619,-0.003265,0.010755,-0.000854,-0.004725
...,...,...,...,...,...
3943,0.006987,0.002554,0.003818,-0.000205,-0.000183
3944,-0.009634,0.000750,-0.005399,0.000516,0.000519
3945,0.007189,0.000717,-0.005310,-0.000285,0.000428
3946,-0.005797,0.000869,0.000985,0.000117,-0.002621


In [216]:
np.mean(all_intercepts[0])*21*100

0   -0.797523
dtype: float64

In [217]:
np.mean(all_lambdas[0])*21*100

0   -0.448972
1   -0.112295
2   -0.022140
3    0.105490
4   -0.514822
dtype: float64

In [218]:
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([-5.78279331]), pvalue=array([7.91608802e-09]))

In [219]:
stats.ttest_1samp(all_lambdas[0], 0.0)[0]

array([-1.10168689, -0.54855856, -0.08795274,  4.21503818, -8.25162009])

In [220]:
## Newey-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.66767493, -0.35627738, -0.05709502,  2.62899832, -5.20022322])

## Carhart4-Factor_OLS (Time-Varying Betas)

In [221]:
ols_model = LinearRegression()

In [222]:
df_FF4 = pd.read_csv('df_FF4.csv')

In [223]:
df_FF4.Dates = df_FF4.Dates.apply(lambda x:x[:10])

In [224]:
df_FF4.set_index('Dates',inplace=True)

In [225]:
df_FF4

Unnamed: 0_level_0,MR_RF,SMB,HML,WML
Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-12-20,-0.0083,0.001267,0.00510,0.0034
2000-12-21,-0.0062,0.001933,-0.00240,-0.0036
2000-12-22,0.0163,0.002433,0.00670,-0.0084
2000-12-25,-0.0003,0.000000,0.00000,0.0000
2000-12-26,-0.0003,0.000000,0.00000,0.0000
...,...,...,...,...
2019-02-27,0.0008,-0.003700,-0.00155,-0.0038
2019-02-28,-0.0055,0.005000,-0.00145,-0.0136
2019-03-01,0.0034,-0.003433,0.00380,0.0025
2019-03-04,0.0001,0.001267,0.00380,-0.0028


In [226]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [227]:
df_Ex_SR = df_SR-df_RF.values

In [228]:
df_Ex_SR = df_Ex_SR.loc[df_FF4.index,:]

In [229]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_FF4)
periods = int((lens - rollingW) / holding) - 1

In [230]:
all_betas = []
# i is time
for i in tqdm(range(periods)):

    df_FF4_tmp = df_FF4.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []
    # j is stock
    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
            b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'SMB',2:'HML',3:'WML',4:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)



100%|██████████| 189/189 [01:50<00:00,  1.70it/s]


In [231]:
all_betas[0]

Unnamed: 0_level_0,MKT,SMB,HML,WML
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABC.SJ.Equity,-0.11912824594681116,-0.7158135966449568,0.8771803416462147,0.02946870859844191
ABI.SJ.Equity,0.2565406592736406,-0.02454098983150671,-0.0012704349782666977,0.14757070777909076
ABR.SJ.Equity,-0.21782838888249048,0.7143299717167659,-0.3041879543800899,0.6649364686134615
ACL.SJ.Equity,0.9163624169149315,1.3622938396494353,-0.050489623789941485,-0.4023141895245076
ACP.SJ.Equity,-0.03911198706166802,-0.05342373907668338,0.141827729168983,-0.03886119462193846
...,...,...,...,...
WTC.SJ.Equity,0.06880709440199435,-0.15813920051641667,-0.22722337605522291,0.22642665726679068
YRK.SJ.Equity,-0.07113888790500746,-0.09560938188299126,0.10032490370604473,0.12128860250395175
ZPT.SJ.Equity,0.07018070220958815,-1.0261899736829472,-0.2824764496101648,1.2379726804295152
ZRR.SJ.Equity,0.1534607993317428,1.2803830127645452,-0.4959856753386197,0.22207693545777155


In [232]:
all_lambdas = []
all_intercepts = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        l = list(ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).coef_)
        h = ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).intercept_
        lambdas.append(l)
        intercepts.append(h)
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3969/3969 [00:10<00:00, 380.21it/s]


In [233]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'WML'})

Unnamed: 0,MKT,SMB,HML,WML
0,0.000895,-0.000110,-0.004220,0.001597
1,0.006620,-0.000723,0.004330,0.008316
2,-0.003732,0.006177,0.002334,-0.014557
3,-0.004576,0.008492,0.004944,0.000501
4,-0.001328,0.001161,0.003132,0.008559
...,...,...,...,...
3964,0.002212,0.003199,0.004573,-0.015733
3965,-0.004973,0.000154,-0.004593,-0.006758
3966,0.008423,-0.000261,-0.010065,0.016061
3967,-0.005797,0.001535,0.004377,0.004697


In [234]:
np.mean(all_intercepts[0])*21*100

0   -0.269634
dtype: float64

In [235]:
np.mean(all_lambdas[0])*21*100

0    0.168697
1   -0.104591
2   -0.262250
3    0.399002
dtype: float64

In [236]:
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([-6.41252278]), pvalue=array([1.59918663e-10]))

In [237]:
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([ 0.46172348, -0.57528994, -1.1966937 ,  1.44537221]), pvalue=array([0.64430494, 0.56512784, 0.23149746, 0.14843233]))

In [81]:
## Newey-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([ 0.3249548 , -0.38617328, -0.34223017,  0.86988668])

## FF5-Factor_OLS (Time-Varying Betas)

In [82]:
ols_model = LinearRegression()

In [83]:
df_FF5 = pd.read_csv('df_FF5.csv')

In [84]:
df_FF5.Dates = df_FF5.Dates.apply(lambda x:x[:10])

In [85]:
df_FF5.set_index('Dates',inplace=True)

In [86]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [87]:
df_Ex_SR = df_SR-df_RF.values

In [88]:
df_Ex_SR = df_Ex_SR.loc[df_FF5.index,:]

In [89]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1
periods

189

In [90]:
all_betas = []
# i is time
for i in tqdm(range(periods)):

    df_FF5_tmp = df_FF5.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []
    # j is stock
    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
#             b = list(ols_model.fit(pd.DataFrame(df_FF5_tmp.loc[df_index,'RMW']).values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'SMB',2:'HML',3:'RMW',4:'CMA',5:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)



100%|██████████| 189/189 [02:02<00:00,  1.54it/s]


In [91]:
all_betas[0]

Unnamed: 0_level_0,MKT,SMB,HML,RMW,CMA
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC.SJ.Equity,-0.12517957365259594,-0.2091896517993868,0.4227074165532479,0.3216544488317023,-0.020897113893007115
ABI.SJ.Equity,0.25475608094329316,-0.005704640473529608,-0.07188377839469011,-0.021825695093243627,-0.010657294131003286
ABR.SJ.Equity,-0.1722271205020849,0.654491059909995,0.1532440768833887,0.24924461619846072,-0.7858609424188141
ACL.SJ.Equity,0.8871324045072376,0.3900499220941924,-0.31435195665589866,0.029524922668776862,0.1236554423053057
ACP.SJ.Equity,-0.04623652159130903,-0.022110306217258055,0.009174940496090259,9.587545188934695e-05,0.06326041913667686
...,...,...,...,...,...
WTC.SJ.Equity,0.0789508451474771,-0.02985422178711064,-0.09987158726440265,-0.04919730658081146,-0.1533213262625501
YRK.SJ.Equity,-0.06894099514428187,-0.024121511915577702,0.07488725720547793,0.08125275916690569,-0.027013071461621806
ZPT.SJ.Equity,0.11092437919860249,-0.12174922618399239,-0.04611782914177426,-0.19699976183430615,-0.41066526548331667
ZRR.SJ.Equity,0.13781598903957332,0.2820699070958408,-0.4215211839414583,0.23866381140893322,0.05609288859677848


In [92]:
all_lambdas = []
all_intercepts = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        l = list(ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).coef_)
        h = ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).intercept_
        lambdas.append(l)
        intercepts.append(h)
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3969/3969 [00:12<00:00, 306.20it/s]


In [93]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'RMW',4:'CMA'})

Unnamed: 0,MKT,SMB,HML,RMW,CMA
0,0.005193,0.000292,0.003356,0.005321,0.010622
1,0.007094,-0.017923,0.001646,-0.002314,-0.008996
2,-0.013367,-0.035686,0.000662,-0.009443,-0.015719
3,-0.001758,0.032430,0.003745,-0.004116,0.003762
4,-0.004108,-0.027834,0.012470,-0.006053,-0.021529
...,...,...,...,...,...
3964,0.003520,0.006802,0.002977,-0.003534,-0.000691
3965,-0.005286,-0.008832,-0.003774,-0.001429,0.009736
3966,0.006937,-0.000962,-0.007273,0.021038,-0.004700
3967,-0.006199,0.002293,0.000579,0.004934,-0.001256


In [94]:
np.mean(all_intercepts[0])*21*100

0   -0.268412
dtype: float64

In [95]:
np.mean(all_lambdas[0])*21*100

0    0.242504
1   -0.385902
2   -0.216035
3   -0.210640
4    0.036737
dtype: float64

In [96]:
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([-6.40613493]), pvalue=array([1.66689131e-10]))

In [97]:
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([ 0.65588922, -0.63487161, -0.79057093, -0.73867379,  0.13302764]), pvalue=array([0.51193343, 0.52554876, 0.42924166, 0.46014879, 0.89417827]))

In [98]:
all_intercepts[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-3.50992885])

In [99]:
## Newey-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([ 0.3912641 , -0.41508605, -0.51186748, -0.48023976,  0.08631757])

### Adding Firm Characteristic as Control Variables

In [103]:
ols_model = LinearRegression()

In [104]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [105]:
df_Ex_SR = df_SR-df_RF.values

In [106]:
df_Ex_SR = df_Ex_SR.loc[df_FF5.index,:]

In [107]:
df_OP = pd.read_csv('df_std_OP.csv')

df_INV = pd.read_csv('df_std_INV.csv')

df_OP.Dates = df_OP.Dates.apply(lambda x:x[:10])

df_OP.set_index('Dates',inplace=True)

df_INV.Dates = df_INV.Dates.apply(lambda x:x[:10])

df_INV.set_index('Dates',inplace=True)

In [108]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [109]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [110]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [111]:
df_OP = df_OP.loc[df_Ex_SR.index,:]

In [112]:
df_INV = df_INV.loc[df_Ex_SR.index,:]

In [113]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)==len(df_OP)==len(df_INV)==len(df_FF5)

True

In [114]:
## Firm characteristics are lagged for 1 month, to show its predicting ability towards stock returns 
Gap = 21

In [115]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1
periods

189

In [116]:
all_lambdas = []
all_intercepts = []


df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T
df_OP_tmp_T = df_OP.iloc[rollingW:rollingW+periods*holding,:].T
df_INV_tmp_T = df_INV.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

# If Gap >0, the following should be 'for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):''
for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is time t in 3885 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    
    df_index = ((((all_betas[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                 intersection(df_PE_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                  intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index)).\
                   intersection(df_OP_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_INV_tmp_T.loc[:,k].dropna().index)
    
    if len(df_index) != 0:
        
        l = list(ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T.loc[df_index,k], df_OP_tmp_T.loc[df_index,k],\
                                           df_INV_tmp_T.loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).coef_)
        
        h = ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T.loc[df_index,k], df_OP_tmp_T.loc[df_index,k],\
                                           df_INV_tmp_T.loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).intercept_
        
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3948/3948 [00:30<00:00, 127.63it/s]


In [117]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'RMW',4:'CMA',5:'Size',6:'PE',7:'OP',8:'INV'})

Unnamed: 0,MKT,SMB,HML,RMW,CMA,Size,PE,OP,INV
0,3.353712e-03,-1.850862e-02,8.069027e-03,8.226317e-04,1.529664e-02,-9.940063e-04,-5.582979e-04,3.859379e-03,-9.548101e-04
1,9.393125e-03,-6.437106e-03,-3.749715e-03,4.586330e-03,1.208667e-02,-1.039790e-03,-1.776867e-03,1.870298e-03,-4.107961e-03
2,-7.911225e-36,6.312659e-36,2.539349e-36,2.439881e-36,-1.545482e-37,1.463923e-36,8.337787e-37,-4.477608e-37,7.830958e-37
3,-9.412972e-03,1.208981e-02,1.100504e-02,-5.015921e-03,4.548124e-03,1.947659e-03,-3.159388e-03,1.133867e-03,1.985641e-03
4,1.163031e-02,-6.670103e-02,-1.479305e-03,-3.318556e-02,-1.133088e-02,-2.066450e-03,-4.185547e-03,7.105066e-04,7.805213e-04
...,...,...,...,...,...,...,...,...,...
3943,8.036107e-03,5.857297e-03,2.788841e-03,-5.008953e-03,-2.077834e-04,-1.462929e-04,-5.835598e-04,1.892520e-04,-8.195344e-04
3944,-9.942570e-03,-6.155193e-03,-3.943893e-03,-1.759396e-04,7.086565e-03,5.355417e-04,5.545640e-04,-3.204636e-04,5.193936e-05
3945,5.740099e-03,-1.823528e-03,-6.276866e-03,2.070689e-02,-4.189734e-03,-2.721839e-04,7.995379e-04,1.491664e-04,-7.803699e-04
3946,-5.799246e-03,1.327930e-03,-3.349282e-03,3.220647e-03,2.329238e-03,3.096578e-04,-2.909989e-03,-1.086650e-03,-3.253052e-03


In [118]:
np.mean(all_intercepts[0])*21*100

0   -0.762744
dtype: float64

In [119]:
np.mean(all_lambdas[0])*21*100

0   -0.546939
1   -0.422126
2    0.146458
3    0.053019
4   -0.081039
5    0.111833
6   -0.512875
7    0.208504
8    0.107925
dtype: float64

In [120]:
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([-4.23719814]), pvalue=array([2.31495189e-05]))

In [121]:
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([-1.31118033, -0.60253958,  0.46951034,  0.16363205, -0.25372711,
        3.57203709, -7.67480892,  2.90458788,  1.72330968]), pvalue=array([1.89873120e-01, 5.46849639e-01, 6.38730828e-01, 8.70029189e-01,
       7.99719607e-01, 3.58443118e-04, 2.07455340e-14, 3.69779318e-03,
       8.49108554e-02]))

In [122]:
## Newey-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values 

array([-0.80222949, -0.39184733,  0.30455396,  0.10580226, -0.16495247,
        2.23883478, -4.8215541 ,  1.83631185,  1.08606619])

In [123]:
all_intercepts[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-2.66983938])