In [23]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.sandbox.regression.gmm import IV2SLS
import numpy as np
from scipy import stats
from tqdm import tqdm
import statsmodels.api as sm

In [25]:
## Newey-West Adjustment For a Time-Series Data, e.g. Lambdas (factor returns)
def NW_ttest(ret,h0):
  T = len(ret)
  lag = int(4*(T/100)**(2/9))
  vv = np.var(ret)

  for l in range(0,lag):
      cc=np.cov(ret[0:(len(ret)-l)].T,ret[l:len(ret)].T)[0,1]
      vv=vv+2*(1-(l+1)/lag)*cc
      t=(np.mean(ret)-h0)/np.sqrt(vv)*np.sqrt(T)
  
  return t

## CAPM_IV

In [26]:
df_MR = pd.read_csv('df_MR.csv')

In [27]:
df_MR.Dates = df_MR.Dates.apply(lambda x:x[:10])

In [28]:
df_MR.set_index('Dates',inplace=True)

In [29]:
ols_model = LinearRegression()

In [30]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [31]:
df_SR = df_SR.loc[:, (df_SR != 0).any(axis=0)]
# df_SR = df_SR.apply(replacer)

In [32]:
df_Ex_SR = df_SR - df_RF.values

In [33]:
df_Ex_SR = df_Ex_SR.loc[df_MR.index,:]
df_Ex_SR.shape

(5215, 740)

In [34]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1

In [35]:
all_betas_ev = []
all_betas_iv = []

# i is time in periods
for i in tqdm(range(periods)):

    df_MR_tmp = df_MR.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_MR_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is cross-sectional stock label
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_MR_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_MR_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_MR_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


100%|██████████| 211/211 [03:20<00:00,  1.05it/s]


In [38]:
all_betas_ev[0]

Unnamed: 0_level_0,beta_1
stock,Unnamed: 1_level_1
ABC.SJ.Equity,0.020752036332654343
ABI.SJ.Equity,0.1960923076113216
ABR.SJ.Equity,0.25699255481558286
ACL.SJ.Equity,1.097261797813182
ACP.SJ.Equity,-0.0360787349375643
...,...
WTC.SJ.Equity,0.4319673313768428
YRK.SJ.Equity,-0.0019644100899250823
ZPT.SJ.Equity,-0.019458893419979854
ZRR.SJ.Equity,0.1641413659846572


In [39]:
all_lambdas = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 4431/4431 [00:14<00:00, 298.37it/s]


In [40]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT'})

Unnamed: 0,Intercept,MKT
0,-0.004259,0.023874
1,0.004750,-0.010384
2,-0.000105,0.012696
3,-0.001016,0.012151
4,0.006704,-0.026393
...,...,...
4426,-0.000695,0.000513
4427,0.001149,0.001897
4428,-0.001743,0.012511
4429,0.004971,-0.003894


In [41]:
np.nanmean(all_lambdas[0], axis = 0)*100*21

array([-0.12126831, -0.21433428])

In [42]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)

Ttest_1sampResult(statistic=array([-0.76588406, -0.37784617]), pvalue=array([0.44378609, 0.70556297]))

In [43]:
## New-West Adjustment for CAPM lambda
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.44484331, -0.23306974])

### Adding Firm Characteristics

In [44]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [45]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [46]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [47]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)

True

In [48]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1

In [54]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []

index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    # k is time t in 3948 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index =  (((((all_betas_ev[i].index).intersection(all_betas_iv[i].index)).\
                 intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                 intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                 intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)]].dropna().index))
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)]].values,\
                    exog = sm.add_constant(pd.concat([all_betas_ev[i].loc[df_index,:],\
                                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                                      df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)),\
                    instrument = sm.add_constant(pd.concat([all_betas_iv[i].loc[df_index,:],\
                                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                                           df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)     
        lambdas.append(l)

all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 4431/4431 [00:21<00:00, 210.62it/s]


In [55]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'Size',3:'PE'})

Unnamed: 0,Intercept,MKT,Size,PE
0,0.001411,0.029158,-0.000714,0.004820
1,0.010663,0.003985,-0.002076,-0.003550
2,0.012263,0.019755,-0.001836,0.000288
3,0.025302,0.021619,-0.003934,0.003830
4,-0.022587,-0.056710,0.004742,-0.001219
...,...,...,...,...
4426,-0.015723,-0.015075,0.002723,0.001520
4427,-0.017773,-0.001042,0.002171,-0.001693
4428,-0.021147,0.001717,0.002643,0.000090
4429,0.011261,-0.007113,-0.000308,0.000678


In [56]:
np.mean(all_lambdas[0])*21*100

0   -0.802520
1   -0.149548
2    0.083902
3    1.168367
dtype: float64

In [57]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-0.61944457, -0.04312719,  0.27545061, 13.32747049])

In [58]:
## New-West Adjustment 
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.4066301 , -0.02855098,  0.18229397,  8.56600948])

## FF3-Factor_IV

In [130]:
ols_model = LinearRegression()

In [131]:
df_FF3 = pd.read_csv('df_FF3.csv')

In [132]:
df_FF3.Dates = df_FF3.Dates.apply(lambda x:x[:10])

In [133]:
df_FF3.set_index('Dates',inplace=True)

In [134]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [136]:
df_Ex_SR = df_SR-df_RF.values

In [137]:
df_Ex_SR = df_Ex_SR.loc[df_FF3.index,:]

In [138]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1

In [139]:
all_betas_ev = []
all_betas_iv = []

# i is time
for i in tqdm(range(periods)):

    df_FF3_tmp = df_FF3.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF3_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF3_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF3_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF3_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)



100%|██████████| 189/189 [04:18<00:00,  1.37s/it]


In [39]:
all_betas_ev[0]

Unnamed: 0_level_0,beta_1,beta_2,beta_3
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABC.SJ.Equity,-0.3450479103402,-0.9013937856205195,0.07037902558738829
ABI.SJ.Equity,0.24442555773531263,0.015567121061726346,-0.01682438515002521
ABR.SJ.Equity,0.12202032625933008,0.09239379104161108,0.10318923571716765
ACL.SJ.Equity,1.068714488205904,1.2858159343408646,0.15535498914098245
ACP.SJ.Equity,-0.056898803620166936,0.03514691634035487,0.10734478653399498
...,...,...,...
WTC.SJ.Equity,0.08731394006476963,-0.22690505911023556,-0.04665498693566708
YRK.SJ.Equity,-0.0640646926233111,-0.028294456627727872,0.06842958205121522
ZPT.SJ.Equity,0.15755587268066448,-0.3008526663092845,0.25989708660932814
ZRR.SJ.Equity,-0.03180208366329865,1.1580198922741012,-0.8364768035242083


In [147]:
all_lambdas = []
# i is time in periods

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
  #k is cross-sectional stock label  
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3969/3969 [00:15<00:00, 260.09it/s]


In [148]:
# One of the issues that often arise with IV estimators is that for any finite stock amount N, 
# there is a very small change that the cross-products of the matrixes of (B_IV ) and (B_EV ) 
# might be singular, which will cause the parameter estimates too large to be trusted. 
# To avoid this potential problem in finite samples, the monthly risk premium estimates that deviate 
# six or more standard deviations of the corresponding factor realizations from their sample average as missing values. 
# For each column, it first computes the Z-score of each value in the column, relative to the column mean and standard deviation.
# It then takes the absolute Z-score because the direction does not matter, only if it is below the threshold.
# all(axis=1) ensures that for each row, all column satisfy the constraint.
# Finally, the result of this condition is used to index the dataframe.
all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 6).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML'})

Unnamed: 0,Intercept,MKT,SMB,HML
0,0.001171,0.011758,-0.026516,0.002814
1,-0.000303,0.008859,-0.017378,0.008799
2,-0.001638,-0.002327,-0.036472,0.059885
3,0.002607,-0.026474,0.048481,0.002946
4,-0.000422,-0.008639,-0.007423,0.012311
...,...,...,...,...
3964,-0.001128,0.004781,0.001644,0.011840
3965,-0.001849,-0.006416,0.000613,-0.002754
3966,0.000375,0.006903,0.006874,-0.019319
3967,-0.000252,-0.008535,-0.002335,-0.002201


In [149]:
np.mean(all_lambdas[0])*21*100

0   -0.374266
1    0.428978
2   -2.642724
3    1.622151
dtype: float64

In [150]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-3.42008208,  0.6228337 , -0.96780872,  0.84194774])

In [151]:
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-2.15280821,  0.39371979, -0.61724742,  0.54224493])

### Adding firm characteristics as control variables

In [45]:
ols_model = LinearRegression()

In [46]:
df_FF3 = pd.read_csv('df_FF3.csv')

In [47]:
df_FF3.Dates = df_FF3.Dates.apply(lambda x:x[:10])

In [48]:
df_FF3.set_index('Dates',inplace=True)

In [49]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [50]:
# df_SR = df_SR.replace(0,np.nan)

In [51]:
df_Ex_SR = df_SR-df_RF.values

In [52]:
df_Ex_SR = df_Ex_SR.loc[df_FF3.index,:]

In [53]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [54]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [55]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [56]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)==len(df_FF3)

True

In [57]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1

In [58]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []

index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    # k is time t in 3948 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index =  (((((all_betas_ev[i].index).intersection(all_betas_iv[i].index)).\
                 intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                 intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                 intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)]].dropna().index))
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)]].values,\
                    exog = sm.add_constant(pd.concat([all_betas_ev[i].loc[df_index,:],\
                                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                                      df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)),\
                    instrument = sm.add_constant(pd.concat([all_betas_iv[i].loc[df_index,:],\
                                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                                           df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)     
        lambdas.append(l)

all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3969/3969 [00:22<00:00, 178.49it/s]


In [59]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'Size',5:'PE'})

Unnamed: 0,Intercept,MKT,SMB,HML,Size,PE
0,0.010727,0.040954,-0.031282,-0.001213,-0.002458,-0.004101
1,0.007573,0.024132,-0.015205,0.008035,-0.001671,-0.001707
2,0.014516,-0.020141,0.025564,0.098764,-0.002241,-0.001682
3,-0.003868,-0.013269,0.013271,-0.017983,0.000987,0.000624
4,-0.012248,-0.042732,0.007130,-0.006637,0.002691,0.003154
...,...,...,...,...,...,...
3964,0.000740,0.009143,0.001142,0.008860,-0.000383,0.000552
3965,-0.003504,-0.009925,0.000281,-0.003759,0.000322,0.000756
3966,-0.000196,0.004987,0.005892,-0.010893,0.000189,0.001550
3967,-0.003676,-0.013189,-0.001745,-0.003972,0.000517,-0.001334


In [60]:
np.mean(all_lambdas[0].dropna())*21*100

0   -0.731730
1   -0.818486
2   -0.161365
3   -0.114729
4    0.095019
5    0.983848
dtype: float64

In [61]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)

Ttest_1sampResult(statistic=array([-3.98043314, -1.28125845, -0.2580833 , -0.12731967,  2.61797765,
       13.86692867]), pvalue=array([7.00199727e-05, 2.00177773e-01, 7.96356004e-01, 8.98693848e-01,
       8.87886588e-03, 9.83045453e-43]))

In [62]:
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-2.54141592, -0.79435453, -0.16660847, -0.08317149,  1.67050677,
        8.78646286])

## Carhart4-Factor_IV

In [63]:
ols_model = LinearRegression()

In [64]:
df_FF4 = pd.read_csv('df_FF4.csv')

In [65]:
df_FF4.Dates = df_FF4.Dates.apply(lambda x:x[:10])

In [66]:
df_FF4.set_index('Dates',inplace=True)

In [67]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [69]:
df_Ex_SR = df_SR-df_RF.values

In [70]:
df_Ex_SR = df_Ex_SR.loc[df_FF4.index,:]

In [71]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_FF4)
periods = int((lens - rollingW) / holding) - 1

In [72]:
periods

189

In [73]:
all_betas_ev = []
all_betas_iv = []

# i is time
for i in tqdm(range(periods)):

    df_FF4_tmp = df_FF4.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF4_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF4_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF4_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF4_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)



100%|██████████| 189/189 [03:41<00:00,  1.17s/it]


In [74]:
all_betas_iv[0]

Unnamed: 0_level_0,beta_1,beta_2,beta_3,beta_4
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABC.SJ.Equity,0.05868531684868637,-0.9794182171356192,0.6322351671160076,0.2131583507134486
ABI.SJ.Equity,0.2594330207856468,-0.09513051220878505,-0.13329957808567186,-0.07879442371422818
ABR.SJ.Equity,-0.46145631538139054,1.769854847974078,0.08008330605892389,0.4934823218392971
ACL.SJ.Equity,0.7466575180412792,0.7470779795059452,-0.7067616892813978,-0.4438821207605245
ACP.SJ.Equity,-0.0344091139964603,-0.18888016893266268,-0.05595481296994584,-0.012026843941453702
...,...,...,...,...
WTC.SJ.Equity,0.05737016567555949,0.002846787451553978,-0.15170457139584625,0.12426244800620771
YRK.SJ.Equity,-0.07044194191912104,-0.12614662409516172,0.08553511175958478,0.12890574927943896
ZPT.SJ.Equity,0.02742378335248406,-0.8615319983806928,-0.3123535190855845,0.49215443679103693
ZRR.SJ.Equity,0.32366582591252707,0.5438415081136593,-0.007375256193542956,-0.1285547255324735


In [75]:
all_lambdas = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3969/3969 [00:13<00:00, 288.69it/s]


In [76]:
# all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 6).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'WML'})

Unnamed: 0,Intercept,MKT,SMB,HML,WML
0,0.001308,0.010749,-0.022658,0.005319,0.004712
1,-0.000250,0.008397,-0.014954,0.009557,0.003223
2,-0.002580,0.004596,-0.071310,0.053617,-0.063046
3,0.003181,-0.031128,0.062694,0.010016,0.014090
4,0.000222,-0.013489,0.008114,0.020869,0.015983
...,...,...,...,...,...
3964,-0.001320,-0.001233,0.003331,-0.003058,-0.037847
3965,-0.001730,-0.009182,0.000746,-0.014848,-0.015197
3966,0.000998,0.003557,0.005668,-0.043176,-0.010409
3967,-0.000374,-0.005383,-0.002599,0.008939,0.014915


In [77]:
np.mean(all_lambdas[0].dropna())

0   -0.000079
1   -0.001564
2   -0.004346
3   -0.006590
4   -0.004581
dtype: float64

In [78]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)

Ttest_1sampResult(statistic=array([-0.43440934, -0.92116073, -0.82863604, -1.08348736, -0.82170634]), pvalue=array([0.66401483, 0.35702252, 0.40736022, 0.27865799, 0.41129342]))

In [79]:
## New-West Adjustment 
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.28970946, -0.60074729, -0.51907038, -0.67852529, -0.51778038])

## FF5-Factor_IV

In [80]:
ols_model = LinearRegression()

In [81]:
df_FF5 = pd.read_csv('df_FF5.csv')

In [82]:
df_FF5.Dates = df_FF5.Dates.apply(lambda x:x[:10])

In [83]:
df_FF5.set_index('Dates',inplace=True)

In [84]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [86]:
df_Ex_SR = df_SR-df_RF.values

In [87]:
df_Ex_SR = df_Ex_SR.loc[df_FF5.index,:]

In [88]:
n = 5
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1

In [89]:
periods

165

In [90]:
all_betas_ev = []
all_betas_iv = []

# i is time
for i in tqdm(range(periods)):

    df_FF5_tmp = df_FF5.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF5_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF5_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF5_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF5_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)



100%|██████████| 165/165 [04:45<00:00,  1.73s/it]


In [91]:
all_betas_iv[0]

Unnamed: 0_level_0,beta_1,beta_2,beta_3,beta_4,beta_5
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC.SJ.Equity,0.027739161068604256,-0.20864088588926621,0.45301550021405157,0.2619034718731777,-0.039434944472493905
ABI.SJ.Equity,0.23574838065910345,-0.0070256172165493735,-0.06894415920407701,-0.09845853340856141,-0.0950092181703325
ABR.SJ.Equity,-0.35483965264963613,0.717706391937484,-0.012830150917756189,0.19629746264209214,-0.4725413050290982
ACL.SJ.Equity,0.8125232512525501,0.29456009431204255,-0.4777124284101453,0.3104093303780326,-0.03430134775210375
ACP.SJ.Equity,0.00018233015912555502,-0.03349833542803239,0.019448973928432617,0.025793653470835384,0.029027983305394238
...,...,...,...,...,...
WTC.SJ.Equity,0.061377006609874586,-0.0017920022385793932,-0.10630295750267238,-0.13649233657218968,-0.12949964475755377
YRK.SJ.Equity,-0.09685868344641943,0.1315932064250195,-0.18125147762369698,-0.0948984019406076,-0.2466106135879061
ZPT.SJ.Equity,0.03656268256101802,0.0799384532543406,0.04518269456787212,0.22624294124386934,-0.2763675833139778
ZRR.SJ.Equity,0.28103034997074855,0.13223014105682518,-0.0573943108686875,0.06122993320405817,-0.14468011731210154


In [92]:
all_lambdas = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3465/3465 [00:17<00:00, 195.46it/s]


In [93]:
# all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 3).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'RMW',5:'CMA'})

Unnamed: 0,Intercept,MKT,SMB,HML,RMW,CMA
0,-0.002595,-0.036408,0.101865,-0.027268,0.064488,0.033975
1,0.000204,0.017395,0.059345,-0.021122,0.017340,0.015095
2,0.001960,-0.024289,0.005689,-0.004852,0.025110,0.040461
3,0.002006,0.001961,0.154594,-0.089907,0.011504,0.065018
4,0.001426,0.011220,0.000422,0.029456,-0.024108,-0.006367
...,...,...,...,...,...,...
3460,-0.000814,0.001696,0.025266,0.008378,-0.014354,0.008213
3461,-0.001063,-0.011310,-0.014821,-0.011359,0.001926,0.030011
3462,-0.000279,0.010754,-0.000681,0.001823,0.037409,-0.003742
3463,0.001085,-0.020361,0.027573,0.002954,-0.034154,0.059204


In [94]:
np.mean(all_lambdas[0])*21*100

0   -0.504765
1    1.081535
2    0.763762
3    2.495129
4    5.763902
5    6.336348
dtype: float64

In [95]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-2.27610266,  0.69014178,  0.03207976,  0.54911619,  0.83474209,
        0.49343067])

In [96]:
## New-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-1.47705265,  0.44104491,  0.02053001,  0.35688192,  0.55395337,
        0.3209101 ])

### Adding Firm Characteristics as Control Variables

In [97]:
df_OP = pd.read_csv('df_std_OP.csv')

df_INV = pd.read_csv('df_std_INV.csv')

df_OP.Dates = df_OP.Dates.apply(lambda x:x[:10])

df_OP.set_index('Dates',inplace=True)

df_INV.Dates = df_INV.Dates.apply(lambda x:x[:10])

df_INV.set_index('Dates',inplace=True)

In [98]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [99]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [100]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [101]:
df_OP = df_OP.loc[df_Ex_SR.index,:]

In [102]:
df_INV = df_INV.loc[df_Ex_SR.index,:]

In [103]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)==len(df_OP)==len(df_INV)

True

In [104]:
Gap = 21

In [105]:
n = 5
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1
periods

165

In [106]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T
df_OP_tmp_T = df_OP.iloc[rollingW:rollingW+periods*holding,:].T
df_INV_tmp_T = df_INV.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)
# If Gap >0, the following should be 'for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):''
for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is time t in 3948 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index = ((((((all_betas_ev[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                 intersection(df_PE_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                  intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index)).\
                   intersection(df_OP_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_INV_tmp_T.loc[:,k].dropna().index)).\
                     intersection(all_betas_iv[i].index))
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values,\
                    exog = sm.add_constant(pd.concat([all_betas_ev[i].loc[df_index,:],\
                                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                                      df_PE_tmp_T.loc[df_index,k],\
                                                      df_OP_tmp_T.loc[df_index,k],\
                                                      df_INV_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)),\
                    instrument = sm.add_constant(pd.concat([all_betas_iv[i].loc[df_index,:],\
                                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                                           df_PE_tmp_T.loc[df_index,k],\
                                                           df_OP_tmp_T.loc[df_index,k],\
                                                           df_INV_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)           
        lambdas.append(l)

all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3444/3444 [00:27<00:00, 125.14it/s]


In [126]:
all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 3).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'RMW',5:'CMA',6:'Size',7:'PE',8:'OP',9:'INV'})

Unnamed: 0,Intercept,MKT,SMB,HML,RMW,CMA,Size,PE,OP,INV
0,-0.011827,-0.023069,0.085299,-0.063094,0.056884,0.031306,0.002571,0.004230,0.003220,0.003572
1,-0.012564,-0.002128,0.078102,-0.021120,0.045023,0.008844,0.002164,0.000689,-0.001394,0.002813
2,0.007764,0.008337,0.050220,-0.012628,-0.003301,0.007465,-0.001487,-0.000040,0.004447,-0.001319
3,-0.002531,-0.020339,-0.032863,-0.021327,0.026876,-0.005788,0.000354,0.000505,0.002267,0.002344
4,0.009193,-0.005720,-0.228515,0.087180,-0.131290,-0.034139,-0.000849,-0.005842,-0.002895,-0.005884
...,...,...,...,...,...,...,...,...,...,...
3439,0.000557,0.011755,-0.000309,0.006006,-0.002675,-0.003219,-0.000446,-0.000491,0.000159,-0.000790
3440,-0.007412,-0.018653,-0.032161,-0.018848,0.017171,0.014285,0.001078,0.000187,-0.000657,0.000189
3441,0.002147,0.008373,0.032773,0.003041,0.012251,-0.002043,-0.000266,0.001326,0.000358,-0.001430
3442,-0.005234,-0.017837,-0.034687,0.010752,0.018850,0.032638,0.000653,-0.003792,-0.003111,-0.002934


In [127]:
np.mean(all_lambdas[0])*21*100

0   -0.660636
1   -0.817933
2   -1.540293
3   -1.889474
4   -1.263579
5   -0.373080
6    0.089673
7   -0.475708
8    0.254486
9    0.098073
dtype: float64

In [128]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-2.31911573, -1.09315226, -0.59007815, -1.59403424, -0.97493671,
       -0.3075382 ,  1.72866065, -5.27346186,  2.72530314,  1.15230241])

In [129]:
## New-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-1.45962092, -0.67714827, -0.37939123, -1.0070646 , -0.61542253,
       -0.20058756,  1.08646018, -3.3634224 ,  1.73553362,  0.72927884])