In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.sandbox.regression.gmm import IV2SLS
import numpy as np
from scipy import stats
from tqdm import tqdm
import statsmodels.api as sm

In [2]:
## Newey-West Adjustment For a Time-Series Data, e.g. Lambdas (factor returns)
def NW_ttest(ret,h0):
  T = len(ret)
  lag = int(4*(T/100)**(2/9))
  vv = np.var(ret)

  for l in range(0,lag):
      cc=np.cov(ret[0:(len(ret)-l)].T,ret[l:len(ret)].T)[0,1]
      vv=vv+2*(1-(l+1)/lag)*cc
      t=(np.mean(ret)-h0)/np.sqrt(vv)*np.sqrt(T)
  
  return t

## CAPM_IV

In [3]:
df_MR = pd.read_csv('df_MR.csv')

In [4]:
df_MR.Dates = df_MR.Dates.apply(lambda x:x[:10])

In [5]:
df_MR.set_index('Dates',inplace=True)

In [6]:
ols_model = LinearRegression()

In [7]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [8]:
df_SR = df_SR.loc[:, (df_SR != 0).any(axis=0)]
# df_SR = df_SR.apply(replacer)

In [9]:
df_Ex_SR = df_SR - df_RF.values

In [10]:
df_Ex_SR = df_Ex_SR.loc[df_MR.index,:]
df_Ex_SR.shape

(5215, 740)

In [11]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1

In [12]:
all_betas_ev = []
all_betas_iv = []

# i is time in periods
for i in tqdm(range(periods)):

    df_MR_tmp = df_MR.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_MR_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is cross-sectional stock label
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_MR_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_MR_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_MR_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


100%|██████████| 211/211 [04:57<00:00,  1.41s/it]


In [13]:
all_betas_ev[0]

Unnamed: 0_level_0,beta_1
stock,Unnamed: 1_level_1
ABC.SJ.Equity,0.009525758651982777
ABI.SJ.Equity,0.1960923076113216
ABR.SJ.Equity,0.186521230022673
ACL.SJ.Equity,1.097261797813182
ACP.SJ.Equity,-0.032080565428087175
...,...
WTC.SJ.Equity,0.14110604960768647
YRK.SJ.Equity,-0.0007643014234958393
ZPT.SJ.Equity,-0.006818859254406023
ZRR.SJ.Equity,0.1498549385898545


In [14]:
all_lambdas = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 4431/4431 [00:11<00:00, 381.60it/s]


In [15]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT'})

Unnamed: 0,Intercept,MKT
0,-0.001979,0.016466
1,0.001600,-0.002094
2,-0.000184,0.011280
3,-0.001818,0.015955
4,0.004119,-0.022074
...,...,...
4426,-0.000319,-0.000034
4427,0.000149,0.003264
4428,-0.000715,0.011156
4429,0.001260,0.001068


In [16]:
np.nanmean(all_lambdas[0], axis = 0)*100*21

array([-0.23449305,  0.13260874])

In [17]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)

Ttest_1sampResult(statistic=array([-4.29253676,  0.29508812]), pvalue=array([1.80417867e-05, 7.67940391e-01]))

In [18]:
## New-West Adjustment for CAPM lambda
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-2.4635258,  0.1754759])

### Adding Firm Characteristics

In [19]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [20]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [21]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [22]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)

True

In [23]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1

In [24]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []

index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    # k is time t in 3948 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index =  (((((all_betas_ev[i].index).intersection(all_betas_iv[i].index)).\
                 intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                 intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                 intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)]].dropna().index))
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)]].values,\
                    exog = sm.add_constant(pd.concat([all_betas_ev[i].loc[df_index,:],\
                                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                                      df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)),\
                    instrument = sm.add_constant(pd.concat([all_betas_iv[i].loc[df_index,:],\
                                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                                           df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)     
        lambdas.append(l)

all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 4431/4431 [00:26<00:00, 165.65it/s]


In [25]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'Size',3:'PE'})

Unnamed: 0,Intercept,MKT,Size,PE
0,-0.000899,0.024234,-0.000201,0.004694
1,0.006810,-0.000371,-0.001295,-0.003227
2,0.005971,0.020013,-0.001070,0.000147
3,0.014526,0.023382,-0.002621,0.003577
4,-0.011938,-0.049785,0.002896,-0.001443
...,...,...,...,...
4426,-0.003034,-0.003449,0.000579,0.000979
4427,-0.003265,0.001809,0.000419,-0.001005
4428,-0.003432,0.006911,0.000410,0.000321
4429,0.000863,0.002092,0.000185,-0.000092


In [26]:
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.695162
1   -0.481642
2    0.084818
3    0.973146
dtype: float64

In [27]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-3.68948571, -0.85975906,  2.43448067, 16.08246606])

In [28]:
## New-West Adjustment 
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-2.33636942, -0.5257334 ,  1.53924127, 10.06383403])

## FF3-Factor_IV

In [29]:
ols_model = LinearRegression()

In [30]:
df_FF3 = pd.read_csv('df_FF3.csv')

In [31]:
df_FF3.Dates = df_FF3.Dates.apply(lambda x:x[:10])

In [32]:
df_FF3.set_index('Dates',inplace=True)

In [33]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [34]:
df_Ex_SR = df_SR-df_RF.values

In [35]:
df_Ex_SR = df_Ex_SR.loc[df_FF3.index,:]

In [36]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1

In [37]:
all_betas_ev = []
all_betas_iv = []

# i is time
for i in tqdm(range(periods)):

    df_FF3_tmp = df_FF3.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF3_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF3_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF3_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF3_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)



100%|██████████| 189/189 [04:40<00:00,  1.48s/it]


In [38]:
all_betas_ev[0]

Unnamed: 0_level_0,beta_1,beta_2,beta_3
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABC.SJ.Equity,-0.3268531755110442,-0.6124245989848942,0.5184193635149579
ABI.SJ.Equity,0.24617730235525837,0.05425134648046361,0.0422217378258441
ABR.SJ.Equity,0.1010109731125653,-0.34507237181482997,-0.5898301921449686
ACL.SJ.Equity,1.0668768540336409,1.5465407429547147,0.5948677602591734
ACP.SJ.Equity,-0.06303301722658639,-0.10388120490467742,0.1043280940176547
...,...,...,...
WTC.SJ.Equity,0.08829694059751701,-0.23470768343869003,-0.1276217050250745
YRK.SJ.Equity,-0.06685682467539814,-0.07086800152369618,-0.0005126169970859351
ZPT.SJ.Equity,0.1284458132594861,-0.8697247063803566,-0.668607414250201
ZRR.SJ.Equity,-0.024711260687379596,1.861073057068256,-1.0941560241410766


In [39]:
all_lambdas = []
# i is time in periods

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
  #k is cross-sectional stock label  
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3969/3969 [00:15<00:00, 258.87it/s]


In [40]:
# One of the issues that often arise with IV estimators is that for any finite stock amount N, 
# there is a very small change that the cross-products of the matrixes of (B_IV ) and (B_EV ) 
# might be singular, which will cause the parameter estimates too large to be trusted. 
# To avoid this potential problem in finite samples, the monthly risk premium estimates that deviate 
# six or more standard deviations of the corresponding factor realizations from their sample average as missing values. 
# For each column, it first computes the Z-score of each value in the column, relative to the column mean and standard deviation.
# It then takes the absolute Z-score because the direction does not matter, only if it is below the threshold.
# all(axis=1) ensures that for each row, all column satisfy the constraint.
# Finally, the result of this condition is used to index the dataframe.
all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 6).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML'})

Unnamed: 0,Intercept,MKT,SMB,HML
0,0.001815,0.003035,-0.008225,-0.016139
1,0.000483,0.015281,-0.026548,0.119394
2,-0.005488,0.050964,-0.157700,0.258583
3,0.004780,-0.034746,0.108313,0.054221
4,-0.000398,-0.003620,-0.015763,0.067161
...,...,...,...,...
3964,-0.001137,0.003157,0.005594,0.007004
3965,-0.001701,-0.005845,-0.002336,-0.005901
3966,0.000210,0.007632,0.005109,-0.008238
3967,-0.000515,-0.008449,-0.001604,0.008289


In [41]:
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.230296
1   -0.443876
2   -4.068414
3   -1.191729
dtype: float64

In [42]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-2.12808159, -0.5469318 , -1.53500743, -0.40440495])

In [43]:
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-1.37001976, -0.34998833, -0.99754777, -0.24744054])

### Adding firm characteristics as control variables

In [44]:
ols_model = LinearRegression()

In [45]:
df_FF3 = pd.read_csv('df_FF3.csv')

In [46]:
df_FF3.Dates = df_FF3.Dates.apply(lambda x:x[:10])

In [47]:
df_FF3.set_index('Dates',inplace=True)

In [48]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [49]:
# df_SR = df_SR.replace(0,np.nan)

In [50]:
df_Ex_SR = df_SR-df_RF.values

In [51]:
df_Ex_SR = df_Ex_SR.loc[df_FF3.index,:]

In [52]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [53]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [54]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [55]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)==len(df_FF3)

True

In [56]:
n = 3
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1

In [57]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []

index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    # k is time t in 3948 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index =  (((((all_betas_ev[i].index).intersection(all_betas_iv[i].index)).\
                 intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                 intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                 intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)]].dropna().index))
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)]].values,\
                    exog = sm.add_constant(pd.concat([all_betas_ev[i].loc[df_index,:],\
                                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                                      df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)),\
                    instrument = sm.add_constant(pd.concat([all_betas_iv[i].loc[df_index,:],\
                                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                                           df_PE_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)     
        lambdas.append(l)

all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3969/3969 [00:25<00:00, 158.10it/s]


In [58]:
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'Size',5:'PE'})

Unnamed: 0,Intercept,MKT,SMB,HML,Size,PE
0,0.019847,0.061514,-0.065955,-0.022792,-0.004659,-0.006877
1,0.008911,0.015144,0.004663,0.032104,-0.001553,-0.000520
2,0.053855,0.070376,-0.061480,0.123890,-0.010556,-0.004021
3,-0.016999,-0.039322,0.040828,-0.024967,0.003740,0.001863
4,-0.016434,-0.064259,0.043714,0.026512,0.004117,0.005366
...,...,...,...,...,...,...
3964,-0.000147,0.007495,0.004358,0.007326,-0.000226,0.000743
3965,-0.003085,-0.009041,-0.002363,-0.007532,0.000269,0.000465
3966,0.000450,0.005478,0.004398,-0.007938,0.000064,0.001609
3967,-0.003340,-0.013284,-0.001377,0.007162,0.000416,-0.000977


In [59]:
np.mean(all_lambdas[0].dropna())*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.752861
1   -0.900076
2   -0.019621
3   -0.362426
4    0.099754
5    0.972124
dtype: float64

In [60]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)

Ttest_1sampResult(statistic=array([-2.97595833, -1.28929888, -0.03503747, -0.44421432,  2.04290027,
       13.45099292]), pvalue=array([2.93837380e-03, 1.97369348e-01, 9.72051624e-01, 6.56911867e-01,
       4.11282509e-02, 2.29922023e-40]))

In [61]:
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-1.93242562, -0.80032237, -0.02269382, -0.292432  ,  1.32293421,
        8.55800316])

## Carhart4-Factor_IV

In [62]:
ols_model = LinearRegression()

In [63]:
df_FF4 = pd.read_csv('df_FF4.csv')

In [64]:
df_FF4.Dates = df_FF4.Dates.apply(lambda x:x[:10])

In [65]:
df_FF4.set_index('Dates',inplace=True)

In [66]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [67]:
df_Ex_SR = df_SR-df_RF.values

In [68]:
df_Ex_SR = df_Ex_SR.loc[df_FF4.index,:]

In [69]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_FF4)
periods = int((lens - rollingW) / holding) - 1

In [70]:
periods

189

In [71]:
all_betas_ev = []
all_betas_iv = []

# i is time
for i in tqdm(range(periods)):

    df_FF4_tmp = df_FF4.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF4_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF4_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF4_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF4_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)



100%|██████████| 189/189 [04:36<00:00,  1.47s/it]


In [72]:
all_betas_iv[0]

Unnamed: 0_level_0,beta_1,beta_2,beta_3,beta_4
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABC.SJ.Equity,0.0656845569626169,-0.8699058368890427,1.22047780623036,0.11164566633025275
ABI.SJ.Equity,0.26707948127248565,-0.07241045551827295,-0.06176587041601211,-0.06614527486980788
ABR.SJ.Equity,-0.4795801044035704,1.9057894646316478,-0.15162748653778113,0.5509700968751177
ACL.SJ.Equity,0.7706463652726757,1.1732282918786088,-0.6077852702344433,-0.3580881554110593
ACP.SJ.Equity,-0.016920386656687835,-0.01049026612220319,0.19099885306394182,-0.012355055461858083
...,...,...,...,...
WTC.SJ.Equity,0.054390082964423464,-0.055516906448164205,-0.341291631075831,0.1364131330049344
YRK.SJ.Equity,-0.06899731520465617,-0.1317826150230018,0.1757901993569013,0.11825865851026425
ZPT.SJ.Equity,0.04773796186747457,-1.0492644000104407,-0.08757487010402643,0.5184032714137832
ZRR.SJ.Equity,0.3234916219347627,0.701062183911547,0.03077876038596028,-0.11212116151580748


In [73]:
all_lambdas = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3969/3969 [00:15<00:00, 251.53it/s]


In [74]:
# all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 6).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'WML'})

Unnamed: 0,Intercept,MKT,SMB,HML,WML
0,0.001769,0.004210,-0.007220,-0.010617,0.006389
1,0.001086,0.011596,-0.016017,0.125695,-0.009031
2,-0.004272,0.032881,-0.152278,0.227423,-0.101148
3,0.005374,-0.035881,0.127116,0.080037,0.016442
4,0.000040,-0.003897,-0.004719,0.084834,0.009408
...,...,...,...,...,...
3964,-0.001524,-0.000176,0.006805,0.005394,-0.032816
3965,-0.001723,-0.005855,-0.002345,-0.005938,-0.002446
3966,0.000485,0.008426,0.004559,-0.009585,0.010196
3967,-0.000599,-0.008249,-0.001527,0.009448,0.001439


In [75]:
np.mean(all_lambdas[0].dropna())

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0    0.000180
1   -0.000603
2   -0.003755
3    0.008917
4    0.001380
dtype: float64

In [76]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)

Ttest_1sampResult(statistic=array([ 0.67349064, -0.80701114, -0.89357476,  1.4262298 ,  0.75484449]), pvalue=array([0.50067443, 0.41970843, 0.37160364, 0.15388069, 0.45038709]))

In [77]:
## New-West Adjustment 
all_lambdas[0].dropna().apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([ 0.40553951, -0.50745667, -0.54277449,  0.85025391,  0.4644976 ])

## FF5-Factor_IV

In [78]:
ols_model = LinearRegression()

In [79]:
df_FF5 = pd.read_csv('df_FF5.csv')

In [80]:
df_FF5.Dates = df_FF5.Dates.apply(lambda x:x[:10])

In [81]:
df_FF5.set_index('Dates',inplace=True)

In [82]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

In [83]:
df_Ex_SR = df_SR-df_RF.values

In [84]:
df_Ex_SR = df_Ex_SR.loc[df_FF5.index,:]

In [85]:
n = 5
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1

In [86]:
periods

165

In [87]:
all_betas_ev = []
all_betas_iv = []

# i is time
for i in tqdm(range(periods)):

    df_FF5_tmp = df_FF5.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR.iloc[i*holding:i*holding+rollingW,:]
    

    if (i + 1) % 2 == 1:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF5_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)


        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF5_tmp.iloc[21*k:21*(k+1),:]])
        
        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)


    if (i + 1) % 2 == 0:
        rw_odd = pd.DataFrame()
        for k in range(0,n*12,2):
            rw_odd = pd.concat([rw_odd,df_FF5_tmp.iloc[21*k:21*(k+1),:]])

        betas_iv = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_odd.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_iv.append(b)
        df_betas_iv = pd.DataFrame(np.array(betas_iv)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_iv.append(df_betas_iv)
        

        rw_even = pd.DataFrame()   
        for k in range(1,n*12,2):
            rw_even = pd.concat([rw_even,df_FF5_tmp.iloc[21*k:21*(k+1),:]])

        betas_ev = []
        # j is stock
        for j in df_Ex_SR.columns:
            df_index = df_Ex_SR_tmp.loc[rw_even.index,j].dropna().index
            
            if len(df_index) != 0:
                b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
                b.append(j)
                betas_ev.append(b)
        df_betas_ev = pd.DataFrame(np.array(betas_ev)).rename(columns={0:'beta_1',1:'beta_2',2:'beta_3',3:'beta_4',4:'beta_5',5:'stock'}).set_index('stock')
        all_betas_ev.append(df_betas_ev)



100%|██████████| 165/165 [04:53<00:00,  1.78s/it]


In [88]:
all_betas_iv[0]

Unnamed: 0_level_0,beta_1,beta_2,beta_3,beta_4,beta_5
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC.SJ.Equity,0.017404141603707235,-0.4626161165521546,0.853584653363094,0.6988655148597678,0.154658988919582
ABI.SJ.Equity,0.2320568403644699,-0.026005865333026786,-0.054336445225803394,-0.13543128231582643,-0.029930768809007222
ABR.SJ.Equity,-0.37147172854153504,1.7593474981577522,-0.209837387567265,0.322072323212365,-0.540373057692644
ACL.SJ.Equity,0.8533750560875047,1.0469468630206493,-0.2308102657012565,0.21834308742320813,-0.04145786268668872
ACP.SJ.Equity,0.010382974998620053,0.005489499298303696,0.1855320192534109,-0.0964911443882201,0.0805835709149977
...,...,...,...,...,...
WTC.SJ.Equity,0.04871845219807283,-0.08086673236325148,-0.2471779881248271,-0.1131414368806107,-0.09837551445820594
YRK.SJ.Equity,-0.09877375717702369,0.3717418279557105,-0.2852236683658058,-0.2902212515169485,-0.3799401091160878
ZPT.SJ.Equity,0.03625530757073325,-0.2868064536499078,0.2728226538497176,0.19146474723565043,-0.1649424006158563
ZRR.SJ.Equity,0.29033540002897734,0.689789959911161,-0.09396190749776545,0.03604483507044709,-0.8511628047310142


In [89]:
all_lambdas = []
# i is time

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_index = ((all_betas_iv[i].index).intersection(all_betas_ev[i].index)).intersection(df_Ex_SR.loc[k,:].dropna().index)
    
    if len(df_index) != 0:
        
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,k].values,exog = sm.add_constant(all_betas_ev[i].loc[df_index,:].values.astype(np.float64)),instrument = sm.add_constant(all_betas_iv[i].loc[df_index,:].values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3465/3465 [00:16<00:00, 213.05it/s]


In [90]:
# all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 3).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'RMW',5:'CMA'})

Unnamed: 0,Intercept,MKT,SMB,HML,RMW,CMA
0,-5.119667e-03,-0.002914,-0.058345,0.098615,-0.042906,-0.116153
1,2.274167e-03,0.013777,0.012770,-0.024770,-0.015602,0.029963
2,6.105835e-04,-0.016718,-0.062342,0.064100,-0.038322,-0.056332
3,-1.865059e-04,0.031024,-0.042026,0.052700,-0.063119,-0.047117
4,1.799984e-03,0.000689,0.012385,0.004852,0.008375,0.013486
...,...,...,...,...,...,...
3460,-9.726035e-04,0.002260,0.008120,0.005880,-0.008660,0.007021
3461,-1.755660e-03,-0.002835,-0.011068,-0.009876,0.015862,-0.008484
3462,3.583297e-04,0.009450,0.002169,-0.011189,0.014089,0.004549
3463,-4.830718e-04,-0.005767,-0.010769,0.011664,0.017846,-0.002649


In [91]:
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0    -1.260813
1     6.828044
2    15.216093
3     8.546427
4    14.657665
5    -5.379176
dtype: float64

In [92]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-2.28002313,  1.34062661,  0.51874535,  0.36181064,  1.59685602,
       -0.82349007])

In [93]:
## New-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-1.35205516,  0.79125107,  0.3056829 ,  0.21368124,  0.95460126,
       -0.51555878])

### Adding Firm Characteristics as Control Variables

In [94]:
df_OP = pd.read_csv('df_std_OP.csv')

df_INV = pd.read_csv('df_std_INV.csv')

df_OP.Dates = df_OP.Dates.apply(lambda x:x[:10])

df_OP.set_index('Dates',inplace=True)

df_INV.Dates = df_INV.Dates.apply(lambda x:x[:10])

df_INV.set_index('Dates',inplace=True)

In [95]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')

df_PE = pd.read_csv('df_std_PE.csv')

df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])

df_ln_Size.set_index('Dates',inplace=True)

df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])

df_PE.set_index('Dates',inplace=True)

In [96]:
df_ln_Size = df_ln_Size.loc[df_Ex_SR.index,:]

In [97]:
df_PE = df_PE.loc[df_Ex_SR.index,:]

In [98]:
df_OP = df_OP.loc[df_Ex_SR.index,:]

In [99]:
df_INV = df_INV.loc[df_Ex_SR.index,:]

In [100]:
len(df_ln_Size)==len(df_PE)==len(df_Ex_SR)==len(df_OP)==len(df_INV)

True

In [101]:
Gap = 21

In [102]:
n = 5
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1
periods

165

In [103]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE.iloc[rollingW:rollingW+periods*holding,:].T
df_OP_tmp_T = df_OP.iloc[rollingW:rollingW+periods*holding,:].T
df_INV_tmp_T = df_INV.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)
# If Gap >0, the following should be 'for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):''
for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is time t in 3948 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index = ((((((all_betas_ev[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                 intersection(df_PE_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                  intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index)).\
                   intersection(df_OP_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_INV_tmp_T.loc[:,k].dropna().index)).\
                     intersection(all_betas_iv[i].index))
    
    if len(df_index) != 0:
        iv = IV2SLS(endog = df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values,\
                    exog = sm.add_constant(pd.concat([all_betas_ev[i].loc[df_index,:],\
                                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                                      df_PE_tmp_T.loc[df_index,k],\
                                                      df_OP_tmp_T.loc[df_index,k],\
                                                      df_INV_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)),\
                    instrument = sm.add_constant(pd.concat([all_betas_iv[i].loc[df_index,:],\
                                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                                           df_PE_tmp_T.loc[df_index,k],\
                                                           df_OP_tmp_T.loc[df_index,k],\
                                                           df_INV_tmp_T.loc[df_index,k]],axis = 1).values.astype(np.float64)))
        res = iv.fit()
        l = list(res.params)           
        lambdas.append(l)

all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 3444/3444 [00:29<00:00, 117.66it/s]


In [104]:
all_lambdas[0] = all_lambdas[0][(np.abs((all_lambdas[0]-all_lambdas[0].mean())/all_lambdas[0].std()) <= 3).all(axis=1)]
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT',2:'SMB',3:'HML',4:'RMW',5:'CMA',6:'Size',7:'PE',8:'OP',9:'INV'})

Unnamed: 0,Intercept,MKT,SMB,HML,RMW,CMA,Size,PE,OP,INV
0,-0.014265,-0.033442,0.042430,-0.032455,0.028544,0.022501,0.003424,0.004547,0.002098,0.002360
1,-0.012558,0.001157,0.024291,-0.022793,0.013241,0.000934,0.002270,0.000770,-0.001425,0.002610
2,-0.011171,-0.021922,0.087880,-0.024756,0.022592,-0.003378,0.002283,0.001883,0.003955,-0.000616
3,-0.001963,-0.018922,-0.014410,-0.015132,0.011513,-0.006048,0.000250,0.000380,0.002214,0.002090
4,0.019792,0.014930,-0.118494,0.055010,-0.077545,0.011532,-0.003540,-0.006187,-0.001727,-0.004463
...,...,...,...,...,...,...,...,...,...,...
3439,0.000097,0.010990,0.003844,0.003936,-0.009277,-0.002942,-0.000411,-0.000388,0.000175,-0.000937
3440,-0.006557,-0.013967,-0.007583,-0.009475,0.018604,0.002551,0.000973,0.000510,-0.000300,0.000471
3441,0.003080,0.008685,0.003964,-0.007389,0.009525,0.006448,-0.000348,0.000904,0.000572,-0.000995
3442,-0.004678,-0.014342,-0.006751,0.010294,0.008361,0.015774,0.000642,-0.003461,-0.001627,-0.003397


In [105]:
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.952103
1   -0.879151
2    0.051132
3    0.102356
4    0.940577
5   -0.373992
6    0.127982
7   -0.432520
8    0.217028
9    0.058879
dtype: float64

In [106]:
stats.ttest_1samp(all_lambdas[0].dropna(), 0.0)[0]

array([-3.59166434, -1.24138456,  0.04551549,  0.10634246,  0.98898087,
       -0.41901242,  2.58220985, -3.9527402 ,  2.26070912,  0.71389521])

In [107]:
## New-West Adjustment 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-2.30661461, -0.7712936 ,  0.02921575,  0.06879817,  0.63880906,
       -0.26613885,  1.64595196, -2.52984091,  1.44316208,  0.45350563])