In [1]:
%matplotlib inline
import pandas as pd
import json
from sklearn.linear_model import LinearRegression
import numpy as np
from scipy import stats
from tqdm import tqdm
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [2]:
ols_model = LinearRegression()

In [3]:
## Newey-West Adjustment For a Time-Series Data, e.g. Lambdas (factor returns)
def NW_ttest(ret,h0):
  T = len(ret)
  lag = int(4*(T/100)**(2/9))
  vv = np.var(ret)

  for l in range(0,lag):
      cc=np.cov(ret[0:(len(ret)-l)].T,ret[l:len(ret)].T)[0,1]
      vv=vv+2*(1-(l+1)/lag)*cc
      t=(np.mean(ret)-h0)/np.sqrt(vv)*np.sqrt(T)
  
  return t

Import Data

In [4]:
# Import excessive market return (J203T - Rf)
df_MR = pd.read_csv('df_MR.csv')
df_MR.Dates = df_MR.Dates.apply(lambda x:x[:10])
df_MR.set_index('Dates',inplace=True)

Import risk-free rate (91-day T-Bill)

In [5]:
df_RF = pd.read_csv('df_RF.csv',index_col=0)

Import listed stock returns on the JSE

In [6]:
df_SR = pd.read_csv('df_SR.csv',index_col=0)
df_SR.index = df_SR.index.to_series().apply(lambda x:x[:10])

Drop the columns of stock returns that full of 0s or NAs and replace consecutive 0s with NAs

In [7]:
df_SR = df_SR.loc[:, (df_SR != 0).any(axis=0)].astype(np.float64)
df_SR = df_SR.dropna(axis = 0, how = 'all')
# df_SR = df_SR.apply(replacer)

Calculate the Excess Stock Returns 

In [8]:
df_Ex_SR = df_SR-df_RF.values

Import the Factor Realizations (Mimicking portfolio returns)

In [9]:
df_FF3 = pd.read_csv('df_FF3.csv')
df_FF3.Dates = df_FF3.Dates.apply(lambda x: x[:10])
df_FF3.set_index('Dates',inplace=True)

In [10]:
df_FF4 = pd.read_csv('df_FF4.csv')
df_FF4.Dates = df_FF4.Dates.apply(lambda x:x[:10])
df_FF4.set_index('Dates',inplace=True)

In [11]:
df_FF5 = pd.read_csv('df_FF5.csv')
df_FF5.Dates = df_FF5.Dates.apply(lambda x:x[:10])
df_FF5.set_index('Dates',inplace=True)

Import Firm Characteristics 

In [12]:
df_ln_Size = pd.read_csv('df_ln_Size.csv')
df_ln_Size.Dates = df_ln_Size.Dates.apply(lambda x:x[:10])
df_ln_Size.set_index('Dates',inplace=True)

In [13]:
df_PE = pd.read_csv('df_std_PE.csv')
df_PE.Dates = df_PE.Dates.apply(lambda x:x[:10])
df_PE.set_index('Dates',inplace=True)

In [14]:
df_OP = pd.read_csv('df_std_OP.csv')
df_OP.Dates = df_OP.Dates.apply(lambda x:x[:10])
df_OP.set_index('Dates',inplace=True)

In [15]:
df_INV = pd.read_csv('df_std_INV.csv')
df_INV.Dates = df_INV.Dates.apply(lambda x:x[:10])
df_INV.set_index('Dates',inplace=True)

Import 18 double-sorted portfolios returns

In [16]:
df_18_PR = pd.read_csv('df_18_PR.csv')
df_18_PR.Dates = df_18_PR.Dates.apply(lambda x:x[:10])
df_18_PR.set_index('Dates',inplace=True)

Import Firm Characteristics of 18 double-sorted portfolios

In [17]:
df_18_std_Size = pd.read_csv('df_18_std_size.csv')
df_18_std_Size.Dates = df_18_std_Size.Dates.apply(lambda x:x[:10])
df_18_std_Size.set_index('Dates',inplace=True)

In [18]:
df_18_std_PE = pd.read_csv('df_18_std_PE.csv')
df_18_std_PE.Dates = df_18_std_PE.Dates.apply(lambda x:x[:10])
df_18_std_PE.set_index('Dates',inplace=True)

In [19]:
df_18_std_OP = pd.read_csv('df_18_std_OP.csv')
df_18_std_OP.Dates = df_18_std_OP.Dates.apply(lambda x:x[:10])
df_18_std_OP.set_index('Dates',inplace=True)

In [20]:
df_18_std_INV = pd.read_csv('df_18_std_INV.csv')
df_18_std_INV.Dates = df_18_std_INV.Dates.apply(lambda x:x[:10])
df_18_std_INV.set_index('Dates',inplace=True)

# Empirical Results

This section employs the OLS and IV methods to estimate the premiums for risk factors proposed by prominent asset pricing models.

## FM-OLS Regression (Time-Varying Betas)

This Section first tests whether the risk premiums under the CAPM and the FF3- and FF5-Factor Models are significantly different from zero using the OLS method with individual stocks. For the OLS tests, the regression is fitted with daily return data from month t-36 to month t-1 to estimate beta for month t. 

### CAPM Model

How many years in the rolling estimation window?

In [21]:
n = 3

Define the length of rolling window and rebalance frequency

In [22]:
holding = 21
rollingW = holding * n *12
lens = len(df_MR)
periods = int((lens - rollingW) / holding) - 1
print(periods, rollingW)

211 756


This cell is the first stage of FM-OLS regression, which is used to estimate market betas by regressing stock returns on market factor returns

In [23]:
# Keep the stock returns has the same length as market returns
df_Ex_SR_trnct = df_Ex_SR.loc[df_MR.index,:]

all_betas = []

for i in tqdm(range(periods)):

    df_MR_tmp = df_MR.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR_trnct.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []
    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) > 10:
            b = list(ols_model.fit(df_MR_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)

100%|██████████| 211/211 [00:37<00:00,  5.63it/s]


In [24]:
# Print market betas for one period
all_betas[0]

Unnamed: 0_level_0,MKT
stock,Unnamed: 1_level_1
FDS.SJ.Equity,0.5550883022640546
MOZ.SJ.Equity,0.5653617233888201
MTN.SJ.Equity,1.2658780526386577
TRT.SJ.Equity,0.48320607848989805
AEG.SJ.Equity,0.4687760759603885
...,...
CHE.SJ.Equity,0.14705915711382728
JCG.SJ.Equity,0.13670183676315076
CRM.SJ.Equity,0.18922501182144139
CLE.SJ.Equity,0.17071804056283496


This cell is the second stage of FM-OLS regression, which aims to estimate market risk premiums

In [25]:
all_lambdas = []

df_Ex_SR_tmp_T = df_Ex_SR_trnct.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    # k is stock on cross-section
    i = int(index_times.index(k) / holding)
    # i is time in periods
    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        ols = sm.OLS(df_return, sm.add_constant(all_betas[i].loc[df_index,:].astype(np.float64)))
        res = ols.fit()
        l = list(res.params)
        
        lambdas.append(l)
        
all_lambdas.append(pd.DataFrame(lambdas))

100%|██████████| 4431/4431 [00:06<00:00, 675.36it/s]


In [26]:
# Print market risk premiums and intercepts
all_lambdas[0].rename(columns={0:'Intercept',1:'MKT'})

Unnamed: 0,Intercept,MKT
0,-0.006096,0.010204
1,0.007551,-0.004164
2,0.004306,0.004218
3,-0.002946,0.011662
4,0.003390,-0.002552
...,...,...
4426,0.000890,-0.001593
4427,-0.001875,0.007188
4428,-0.000105,0.011617
4429,0.004768,0.001182


In [27]:
# Print the time-series mean of market risk premiums and intercepts
print(np.mean(all_lambdas[0], axis = 0)*21*100)

0    0.077655
1   -0.516471
dtype: float64


In [28]:
stats.ttest_1samp(all_lambdas[0], 0)

Ttest_1sampResult(statistic=array([ 0.60689726, -1.18705423]), pvalue=array([0.5439502 , 0.23526988]))

In [29]:
# Use the Newey-West t-test to examine market risk premiums and intercepts
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis = 0)

0    0.352552
1   -0.730572
dtype: float64

### —Adding Control Variables

The significance of factor risk premium estimates may due to an omitted variable bias because the second-stage cross-sectional regressions do not control for corresponding firm charactersitics. For mitigating this potential bias, we add characteristics as control variables in the second stage. 

In [30]:
# Keep the size and PE have the same length as stock returns
df_ln_Size_trnct = df_ln_Size.loc[df_Ex_SR_trnct.index,:]
df_PE_trnct = df_PE.loc[df_Ex_SR_trnct.index,:]

In [31]:
len(df_ln_Size_trnct)==len(df_PE_trnct)==len(df_Ex_SR_trnct)

True

In [32]:
# Stock returns are forward for 21 days compared to firm characteristics 
Gap = 21

In [33]:
# This cell represents the second stage of FM-OLS regression, which estimates the market risk premiums 
# by controlling for Size and PE characteristics.
all_lambdas = []
all_intercepts = []

df_Ex_SR_tmp_T = df_Ex_SR_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE_trnct.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)


for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is date t in len(df_Ex_SR_tmp) days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    # df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan), the -inf in ln_Size data dataframe would interrupt the regression
    df_index = (((all_betas[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                   intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index))
    
    if len(df_index) != 0:
        
        l = list(ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).coef_)
        
        h = ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                      df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                          (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).intercept_
        
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 4410/4410 [00:16<00:00, 265.28it/s]


In [34]:
# Print regression intercepts 
all_intercepts[0]

Unnamed: 0,0
0,0.001900
1,0.001460
2,0.003781
3,-0.000321
4,0.002247
...,...
4405,0.002044
4406,-0.002589
4407,-0.000752
4408,0.003381


In [35]:
# Print market risk premiums and the slope coefficients of characteristics
all_lambdas[0].rename(columns={0:'MKT',1:'Size',2:'PE'})

Unnamed: 0,MKT,Size,PE
0,-6.810638e-03,-3.207939e-04,2.007940e-03
1,-7.077851e-03,5.350757e-03,-2.551309e-03
2,-1.077175e-04,-6.693577e-03,-3.258047e-03
3,-2.911853e-35,1.349768e-37,-1.025882e-36
4,-2.256026e-03,-6.975234e-05,-5.719866e-04
...,...,...,...
4405,-5.679331e-03,2.265864e-03,-7.835070e-04
4406,9.384853e-03,-1.224385e-03,-5.415693e-05
4407,1.386840e-02,-6.102249e-04,3.978481e-04
4408,6.848897e-03,-2.837869e-03,-9.578326e-04


In [36]:
# Print the mean of intercepts
np.mean(all_intercepts[0], axis = 0)*21*100

0    0.079734
dtype: float64

In [37]:
# Print the mean of market risk premiums
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.470247
1    0.205496
2   -0.101521
dtype: float64

In [38]:
# Print the result of ordinary t-test
stats.ttest_1samp(all_intercepts[0], 0.0)[0]

array([0.67716809])

In [39]:
# Print the result of ordinary t-test
stats.ttest_1samp(all_lambdas[0], 0.0)[0]

array([-1.00938464,  1.34086112, -1.67691108])

In [40]:
## Newey-West t-test
all_intercepts[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([0.39663275])

In [41]:
## Newey-West t-test
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.62661646,  0.87951969, -1.04503796])

### FF3-Factor Model

In [42]:
# How many years in the rolling window
n = 3

In [43]:
holding = 21
rollingW = holding * n * 12
lens = len(df_FF3)
periods = int((lens - rollingW) / holding) - 1
print(periods, rollingW)

189 756


This cell is the first stage of FM OLS regression under the FF3-Factor Model, which aims to estiamte intercepts and the betas of FF3-Factors

In [44]:
# Keep the excess stock returns and factor returns have the same length
df_Ex_SR_trnct = df_Ex_SR.loc[df_FF3.index,:]

all_betas = []

for i in tqdm(range(periods)):

    df_FF3_tmp = df_FF3.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR_trnct.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []

    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
            b = list(ols_model.fit(df_FF3_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'SMB',2:'HML',3:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)


100%|██████████| 189/189 [00:35<00:00,  5.34it/s]


In [45]:
# Print the estimated betas for one period
all_betas[0]

Unnamed: 0_level_0,MKT,SMB,HML
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FDS.SJ.Equity,0.10715398963217436,-0.734816533049782,-0.36073237024722876
MOZ.SJ.Equity,0.3496847200863343,-1.1690744181025954,-0.3854473564628611
MTN.SJ.Equity,0.5257922000848395,-1.222008284490231,-0.14892697477848102
TRT.SJ.Equity,0.3407005946277602,0.5596396456391646,0.43335432663787793
AEG.SJ.Equity,0.24082250157618124,-0.28012508846578166,-0.08667815822722032
...,...,...,...
CHE.SJ.Equity,0.10055786665632858,0.20035876671951153,0.07053816239795793
JCG.SJ.Equity,0.13232817752399748,0.2136036410356354,-0.26574930268644376
CRM.SJ.Equity,0.051854963428057725,0.04424585096443055,-0.07010278346250398
CLE.SJ.Equity,0.047354069133957115,-0.0019346488690393793,-0.20556129419676167


This cell is the second stage of FM OLS regression, which aims to
estimate intercepts and the risk premiums of the FF3-Factor. It should be noted the regression is performed on a time-series scale

In [46]:
all_lambdas = []
all_intercepts = []

df_Ex_SR_tmp_T = df_Ex_SR_trnct.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        l = list(ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).coef_)
        h = ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).intercept_
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3969/3969 [00:06<00:00, 589.05it/s]


In [47]:
# Print all the risk premiums of FF3-Factor
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML'})

Unnamed: 0,MKT,SMB,HML
0,-0.000178,0.021014,-0.002192
1,0.011544,-0.012654,-0.005232
2,0.000691,-0.001598,-0.001184
3,-0.012045,-0.006196,0.016956
4,0.014316,-0.014997,-0.007062
...,...,...,...
3964,0.011811,-0.019187,-0.013076
3965,-0.011026,-0.004180,0.011909
3966,0.002040,0.015480,0.002550
3967,-0.008616,-0.018792,0.007132


In [48]:
# Print the time-series average of intercepts
np.mean(all_intercepts[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0    0.107307
dtype: float64

In [49]:
# Print the time-series average of risk premiums
np.mean(all_lambdas[0], axis = 0)*21*100

0   -0.470702
1   -0.148356
2    0.284496
dtype: float64

In [50]:
# Print the ordinary t-statistics of intercepts
stats.ttest_1samp(all_intercepts[0], 0.0)

Ttest_1sampResult(statistic=array([0.88743472]), pvalue=array([0.37489868]))

In [51]:
# Print the ordinary t-statistics of factor premiums 
stats.ttest_1samp(all_lambdas[0], 0.0)

Ttest_1sampResult(statistic=array([-1.11113726, -0.59864657,  0.89824256]), pvalue=array([0.26657659, 0.54944277, 0.36911075]))

In [52]:
## Newey-West t-test For FF3-Factor
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.67703242, -0.38548791,  0.59345951])

### —Adding Control Variables

In [53]:
# Keep the size and PE have the same length as stock returns
df_ln_Size_trnct = df_ln_Size.loc[df_Ex_SR_trnct.index,:]
df_PE_trnct = df_PE.loc[df_Ex_SR_trnct.index,:]

In [54]:
len(df_ln_Size_trnct)==len(df_PE_trnct)==len(df_Ex_SR_trnct)

True

In [55]:
# Stock returns are forward for 21 days compared to firm characteristics 
Gap = 21

This cell is the second stage of FM OLS regression under the FF3-Factor model and combined with the firm characteristics as control variables. This aim of this cell is to estimate factor premiums while controlling for omitted-variables bias

In [56]:
all_lambdas = []
all_intercepts = []

df_Ex_SR_tmp_T = df_Ex_SR_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE_trnct.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)


for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    i = int(index_times.index(k) / holding)
    df_index = (((all_betas[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                   intersection(df_PE_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index))
    
    if len(df_index) != 0:
        
        l = list(ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                           df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).coef_)
        
        h = ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], \
                                      df_ln_Size_tmp_T.loc[df_index,k],\
                                      df_PE_tmp_T .loc[df_index,k]],axis = 1).values), \
                          (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).intercept_
        
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3948/3948 [00:17<00:00, 225.83it/s]


In [57]:
# Print all the intercepts
all_intercepts[0]

Unnamed: 0,0
0,0.006178
1,-0.002231
2,-0.000196
3,0.005051
4,0.009729
...,...
3943,-0.000427
3944,0.001235
3945,-0.003506
3946,0.002570


In [58]:
# Print all the risk premiums of FF3-Factor and the slope coefficients of firm characteristics
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'Size',4:'PE'})

Unnamed: 0,MKT,SMB,HML,Size,PE
0,-1.064738e-02,1.840459e-03,-1.835874e-03,-3.196612e-04,3.474267e-03
1,7.467738e-03,-6.077850e-03,1.433193e-03,2.478058e-03,-3.463930e-04
2,-9.726217e-36,5.979824e-36,-7.790773e-37,3.027692e-36,-1.013072e-36
3,8.835954e-04,3.755131e-03,3.443500e-04,1.257490e-04,-6.546777e-03
4,-1.659113e-02,-2.392120e-03,8.430940e-03,5.206268e-03,3.628873e-03
...,...,...,...,...,...
3943,-6.750868e-03,-1.555005e-02,2.356817e-03,4.352075e-03,-1.086785e-03
3944,-2.215524e-02,7.484788e-03,1.175397e-02,7.432452e-03,3.079888e-03
3945,3.239354e-02,-3.845284e-03,-1.326750e-02,-1.478204e-02,-2.565785e-03
3946,-2.942957e-02,-1.049631e-02,2.390797e-02,6.810575e-03,3.358731e-05


In [59]:
# Print the time-series average of intercepts (pricing-errors)
np.mean(all_intercepts[0], axis = 0)*21*100

0    0.067208
dtype: float64

In [60]:
# Print he time-series average of factor premiums 
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.445835
1    0.028950
2    0.307128
3    0.188822
4   -0.065372
dtype: float64

In [61]:
# Print the ordinary t-statistics of intercepts
stats.ttest_1samp(all_intercepts[0], 0.0)[0]

array([0.567425])

In [62]:
# Print the ordinary t-statistics of risk premiums
stats.ttest_1samp(all_lambdas[0], 0.0)[0]

array([-0.9177775 ,  0.09683119,  1.01346342,  1.12457412, -1.16113502])

In [63]:
## Newey-West t-statistics of factor premiums 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.56648578,  0.06263045,  0.66214541,  0.74030678, -0.72724455])

### Carhart4-Factor Model

In [64]:
n = 3
holding = 21
rollingW = holding * n *12
lens = len(df_FF4)
periods = int((lens - rollingW) / holding) - 1

This cell is the first stage of FM OLS regression under the Carhart4-Factor Model, which aims to estiamte intercepts and the betas of Carhart4-Factors

In [65]:
# Keep the excess stock returns and factor returns have the same length
df_Ex_SR_trnct = df_Ex_SR.loc[df_FF4.index,:]

all_betas = []

for i in tqdm(range(periods)):

    df_FF4_tmp = df_FF4.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR_trnct.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []

    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
            b = list(ols_model.fit(df_FF4_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'SMB',2:'HML',3:'WML',4:'stock'}).set_index('stock')
    
    all_betas.append(df_betas)

100%|██████████| 189/189 [00:45<00:00,  4.17it/s]


In [66]:
# Print the estimated betas for one period
all_betas[0]

Unnamed: 0_level_0,MKT,SMB,HML,WML
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FDS.SJ.Equity,0.24177300440579327,-0.4094764065208389,-0.3005724328521078,-0.4919795333836393
MOZ.SJ.Equity,0.4056615847821329,-1.0492150826286548,-0.27889957930875264,-0.07557311035311315
MTN.SJ.Equity,0.547062339296451,-1.2987631211074795,-0.08232264670969627,-0.10404017631632623
TRT.SJ.Equity,0.3348852423272915,0.5910440280061804,0.39633358774372124,0.00609271304615
AEG.SJ.Equity,0.2876566748824977,-0.11515048714272419,-0.0409813329970857,-0.07519192645081765
...,...,...,...,...
CHE.SJ.Equity,0.09089840855436024,0.14076073555101876,0.03526811085986954,-0.05545390193506684
JCG.SJ.Equity,0.14811832628462557,0.3324361243852805,-0.25411183754894456,0.030558795972424446
CRM.SJ.Equity,0.06315120987879569,0.08076189444438221,-0.0717431296682374,-0.04418162886207781
CLE.SJ.Equity,0.06704533036279861,0.05534200328585487,-0.20350931977348796,-0.07392660958028069


This cell is the second stage of FM OLS regression, which aims to
estimate intercepts and the risk premiums of the Carhart4-Factor. It should be noted that the regression is performed on a time-series scale

In [67]:
all_lambdas = []
all_intercepts = []

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        l = list(ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).coef_)
        h = ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).intercept_
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3969/3969 [00:08<00:00, 471.87it/s]


In [68]:
# Print all the risk premiums of Carhart4-Factor
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'WML'})

Unnamed: 0,MKT,SMB,HML,WML
0,0.004545,0.019473,-0.009060,-0.031456
1,0.012638,-0.012311,-0.005911,-0.004416
2,-0.002235,-0.001809,0.002407,0.019264
3,-0.012260,-0.005589,0.017893,0.000400
4,0.010182,-0.012116,-0.001880,0.025864
...,...,...,...,...
3964,0.010946,-0.018895,-0.014050,-0.002631
3965,-0.007452,-0.003173,0.011285,0.010397
3966,-0.007037,0.013551,0.004458,-0.023200
3967,-0.003844,-0.013612,0.004714,0.008081


In [69]:
# Print the time-series average of intercepts
np.mean(all_intercepts[0], axis = 0)*21*100

0    0.070273
dtype: float64

In [70]:
# Print the time-series average of the risk premiums of Carhart4-Factor
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.336100
1   -0.091794
2    0.264773
3    0.059924
dtype: float64

In [71]:
stats.ttest_1samp(all_intercepts[0], 0.0)[0]

array([0.56599423])

In [72]:
stats.ttest_1samp(all_lambdas[0], 0.0)[0]

array([-0.79049803, -0.35975897,  0.97466268,  0.12580304])

In [73]:
## Newey-West t-test
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.48397576, -0.23070276,  0.62619986,  0.08305253])

### FF5-Factor Model

In [74]:
# How many years in the rolling window
n = 3

In [75]:
holding = 21
rollingW = holding * n *12
lens = len(df_FF5)
periods = int((lens - rollingW) / holding) - 1
periods

189

In [76]:
df_Ex_SR_trnct = df_Ex_SR.loc[df_FF5.index,:]

all_betas = []

for i in tqdm(range(periods)):

    df_FF5_tmp = df_FF5.iloc[i*holding:i*holding+rollingW,:]
    df_Ex_SR_tmp = df_Ex_SR_trnct.iloc[i*holding:i*holding+rollingW,:]
    
    betas = []

    for j in df_Ex_SR.columns:
        df_index = df_Ex_SR_tmp.loc[:,j].dropna().index
        
        if len(df_index) != 0:
            b = list(ols_model.fit(df_FF5_tmp.loc[df_index,:].values, df_Ex_SR_tmp.loc[df_index,j].values).coef_)
            b.append(j)
            betas.append(b)
            
    df_betas = pd.DataFrame(np.array(betas)).rename(columns={0:'MKT',1:'SMB',2:'HML',3:'RMW',4:'CMA',5:'stock'}).set_index('stock')    
    all_betas.append(df_betas)


100%|██████████| 189/189 [00:39<00:00,  4.78it/s]


In [77]:
all_betas[0]

Unnamed: 0_level_0,MKT,SMB,HML,RMW,CMA
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FDS.SJ.Equity,0.21594264102098834,-0.26365301195099544,-0.15474269947945266,0.06449239226592982,-0.733818752778601
MOZ.SJ.Equity,0.3567148744111223,-1.1407355303866715,-0.26624992335694897,-0.46973897280632726,0.13455746395747728
MTN.SJ.Equity,0.5385334266557813,-1.2596571699238706,-0.03867049865215673,-0.07861584975846897,-0.21640578514747963
TRT.SJ.Equity,0.39773876018490356,0.8228095450977866,0.47128387465798915,0.38679539906786753,-0.7008336739851152
AEG.SJ.Equity,0.2664837466811016,-0.093139172897612,0.014010569670607449,-0.3348346930264554,-0.22855322146509105
...,...,...,...,...,...
CHE.SJ.Equity,0.09277949691567697,0.19514050786355222,0.07443424189179781,-0.03746204340522412,-0.23471649085733015
JCG.SJ.Equity,0.10013197038212897,0.12032503174862093,-0.33902235920377743,-0.24517513025441123,0.6953659671618272
CRM.SJ.Equity,0.06397657938958827,0.10095118120072953,-0.05869801093783745,0.042567506120066184,-0.07894489270300882
CLE.SJ.Equity,0.07273293650832194,0.1268421246515941,-0.15821236404628308,0.01757411369442965,-0.2863258300137765


In [78]:
all_lambdas = []
all_intercepts = []

df_Ex_SR_tmp_T = df_Ex_SR_trnct.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

for k in tqdm(df_Ex_SR_tmp_T.columns):
    
    i = int(index_times.index(k) / holding)

    df_return = df_Ex_SR_tmp_T.loc[all_betas[i].index,k].dropna()
    df_index = df_return.index
    
    if len(df_index) != 0:
        l = list(ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).coef_)
        h = ols_model.fit(all_betas[i].loc[df_index,:].values, df_return.values).intercept_
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3969/3969 [00:08<00:00, 489.26it/s]


In [79]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'RMW',4:'CMA'})

Unnamed: 0,MKT,SMB,HML,RMW,CMA
0,0.004841,0.019025,0.003580,-0.013258,0.027418
1,0.007902,-0.009573,-0.010646,0.020359,-0.019369
2,-0.002017,-0.002292,-0.003517,-0.000169,-0.011087
3,-0.010529,-0.007551,0.019799,-0.013404,0.005410
4,0.009562,-0.008857,-0.014598,0.033373,-0.023970
...,...,...,...,...,...
3964,0.010772,-0.019225,-0.013925,-0.001462,0.000805
3965,-0.013836,-0.004854,0.011781,0.004009,0.011121
3966,-0.000024,0.014058,0.003850,-0.005597,0.009667
3967,-0.003915,-0.011756,0.002375,0.015650,0.000378


In [80]:
np.mean(all_intercepts[0], axis = 0)*21*100

0    0.099596
dtype: float64

In [81]:
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.415490
1   -0.053654
2    0.290036
3    0.333198
4    0.154152
dtype: float64

In [82]:
stats.ttest_1samp(all_intercepts[0], 0.0)[0]

array([0.84058529])

In [83]:
stats.ttest_1samp(all_lambdas[0], 0.0)[0]

array([-0.9580704 , -0.19496849,  0.96641119,  0.94372652,  0.39466475])

In [84]:
all_intercepts[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([0.48710959])

In [85]:
## Newey-West t-test 
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([-0.58446029, -0.12611289,  0.62517794,  0.60582779,  0.25863726])

### —Adding Control Variables

In [86]:
df_OP_trnct = df_OP.loc[df_FF5.index,:]
df_INV_trnct = df_INV.loc[df_FF5.index,:]

In [87]:
len(df_ln_Size_trnct)==len(df_PE_trnct)==len(df_Ex_SR_trnct)==len(df_OP_trnct)==len(df_INV_trnct)==len(df_FF5)

True

In [88]:
Gap = 21

In [89]:
all_lambdas = []
all_intercepts = []


df_Ex_SR_tmp_T = df_Ex_SR_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_ln_Size_tmp_T = df_ln_Size_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_PE_tmp_T = df_PE_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_OP_tmp_T = df_OP_trnct.iloc[rollingW:rollingW+periods*holding,:].T
df_INV_tmp_T = df_INV_trnct.iloc[rollingW:rollingW+periods*holding,:].T

lambdas = []
intercepts = []
index_times = list(df_Ex_SR_tmp_T.columns)

# If Gap >0, the following should be 'for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):''
for k in tqdm(df_Ex_SR_tmp_T.columns[:-Gap]):
    # k is time t in 3885 days
    i = int(index_times.index(k) / holding)
    # i is time T in 185 periods
    
    df_index = ((((all_betas[i].index).intersection(df_ln_Size_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index).\
                 intersection(df_PE_tmp_T.loc[:,k].replace(-np.inf,np.nan).dropna().index)).\
                  intersection(df_Ex_SR_tmp_T.loc[:,index_times[index_times.index(k)+Gap]].dropna().index)).\
                   intersection(df_OP_tmp_T.loc[:,k].dropna().index)).\
                    intersection(df_INV_tmp_T.loc[:,k].dropna().index)
    
    if len(df_index) != 0:
        
        l = list(ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T.loc[df_index,k], df_OP_tmp_T.loc[df_index,k],\
                                           df_INV_tmp_T.loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).coef_)
        
        h = ols_model.fit((pd.concat([all_betas[i].loc[df_index,:], df_ln_Size_tmp_T.loc[df_index,k],\
                                           df_PE_tmp_T.loc[df_index,k], df_OP_tmp_T.loc[df_index,k],\
                                           df_INV_tmp_T.loc[df_index,k]],axis = 1).values), \
                               (df_Ex_SR_tmp_T.loc[df_index,index_times[index_times.index(k)+Gap]].values)).intercept_
        
        lambdas.append(l)
        intercepts.append(h)
        
all_lambdas.append(pd.DataFrame(lambdas))
all_intercepts.append(pd.DataFrame(intercepts))

100%|██████████| 3948/3948 [00:23<00:00, 168.50it/s]


In [90]:
all_lambdas[0].rename(columns={0:'MKT',1:'SMB',2:'HML',3:'RMW',4:'CMA',5:'Size',6:'PE',7:'OP',8:'INV'})

Unnamed: 0,MKT,SMB,HML,RMW,CMA,Size,PE,OP,INV
0,-0.006739,0.004048,-0.000273,0.003712,0.001711,0.000309,0.003726,0.000033,-0.000115
1,0.005525,-0.004915,-0.005742,-0.003951,-0.003126,0.001862,-0.000911,0.004431,-0.007176
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,-0.013207,0.016723,0.012691,0.023139,-0.007734,0.009477,-0.005230,-0.017224,-0.004898
4,-0.000602,-0.013702,-0.002164,-0.019258,0.012822,-0.004541,0.002362,0.017438,0.006125
...,...,...,...,...,...,...,...,...,...
3943,-0.012237,-0.014547,0.003047,-0.004095,0.006937,0.006192,-0.001636,0.000692,0.000298
3944,-0.028670,0.008107,0.012182,0.002217,0.011556,0.009252,0.002554,-0.001434,-0.003836
3945,0.035655,-0.004751,-0.014623,-0.001076,0.002495,-0.015915,-0.002500,0.001175,-0.000417
3946,-0.034110,-0.000918,0.022731,0.009771,0.005393,0.011302,-0.000467,-0.000601,-0.007610


In [91]:
np.mean(all_intercepts[0], axis = 0)*21*100

0    0.045375
dtype: float64

In [92]:
np.mean(all_lambdas[0])*21*100

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0   -0.342707
1    0.105880
2    0.428785
3    0.107426
4    0.295921
5    0.176873
6   -0.083288
7    0.270894
8    0.277168
dtype: float64

In [93]:
stats.ttest_1samp(all_intercepts[0], 0.0)[0]

array([0.36271981])

In [94]:
stats.ttest_1samp(all_lambdas[0], 0.0)[0]

array([-0.70104045,  0.28243647,  0.84589056,  0.30144464,  0.58326256,
        1.08479107, -1.38725778,  2.34160821,  2.15841627])

In [95]:
all_lambdas[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values 

array([-0.43273271,  0.18666049,  0.5683471 ,  0.19296423,  0.38977885,
        0.70888499, -0.87606561,  1.54399394,  1.38705367])

In [96]:
all_intercepts[0].apply(lambda x: NW_ttest(ret = x, h0=0), axis=0).values

array([0.21081816])