In [1]:
import numpy as np
import pandas as pd
import statsmodels.regression.linear_model as rg
import statsmodels.tools.tools as ct


In [2]:
# 5. Efficient Market Hypothesis (EMH)

# Fama-French-Carhart Factors: U.S. Total Stock Market Premium (Mkt-Rf), U.S. Total Stock Market Size Premium (SMB),
#                              U.S. Total Stock Market Investment Style Premium (HML), Risk Free Rate (Rf)

# Data Query and Range Delimiting

# Modify Path to Data File
aemhall = pd.read_csv('C:\\Users\\nisha\\Documents\\Data Science\\Portfolio Analytics\\Efficient-Market-Hypothesis-Data.txt', index_col='Date',
                           parse_dates=True)


In [3]:
# Monthly and Annual Returns Calculation
aemhall.loc[:, 'amomentumret'] = aemhall.loc[:, 'amomentum'] / 100
aemhall.loc[:, 'aspremiumret'] = aemhall.loc[:, 'aspremium'] / 100
aemhall.loc[:, 'asizeret'] = aemhall.loc[:, 'asize'] / 100
aemhall.loc[:, 'astyleret'] = aemhall.loc[:, 'astyle'] / 100
aemhall.loc[:, 'ariskret'] = aemhall.loc[:, 'arisk'] / 100


In [4]:
# 5.1. Weak From Efficient Market Hypothesis

# 5.1.1. Test Technical Factor Statistical Significance
# Add Constant Column as OLS regression doesn't include it
aemhall.loc[:, 'aconstant'] = ct.add_constant(aemhall)
# Calculate OLS Regression
aregfactorsweak = ['aconstant', 'amomentumret']
aregressionweak = rg.OLS(aemhall.loc[:, 'aspremiumret'], aemhall.loc[:, aregfactorsweak],
                        hasconst=bool).fit()
# Coefficients Statistical Significance p-value < 0.05 (95% of confidence)
print(aregressionweak.summary())


                            OLS Regression Results                            
Dep. Variable:           aspremiumret   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                 -0.004
Method:                 Least Squares   F-statistic:                    0.6507
Date:                Mon, 06 Nov 2017   Prob (F-statistic):              0.422
Time:                        12:24:20   Log-Likelihood:                 15.219
No. Observations:                  89   AIC:                            -26.44
Df Residuals:                      87   BIC:                            -21.46
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
aconstant        0.0939      0.026      3.657   

In [5]:
# 5.1.2. Test Regression Residuals White Noise
# Calculate Residuals
aemhall.loc[:, 'aemhweakres'] = aregressionweak.resid
aemhall.loc[:, 'aemhweakres1'] = aemhall.loc[:, 'aemhweakres'].shift(1)
# Convert all NaN value to 0 as OLS regression function doesn't have exception
aemhall[np.isnan(aemhall)] = 0
# Calculate OLS Regression
aregfactweakres = ['aconstant', 'aemhweakres1']
aregweakres = rg.OLS(aemhall.loc[:, 'aemhweakres'], aemhall.loc[:, aregfactweakres],
                        hasconst=bool).fit()
# Coefficients Statistical Significance p-value < 0.05 (95% of confidence)
print(aregweakres.summary())


                            OLS Regression Results                            
Dep. Variable:            aemhweakres   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.011
Method:                 Least Squares   F-statistic:                   0.07151
Date:                Mon, 06 Nov 2017   Prob (F-statistic):              0.790
Time:                        12:24:32   Log-Likelihood:                 15.256
No. Observations:                  89   AIC:                            -26.51
Df Residuals:                      87   BIC:                            -21.53
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
aconstant    -2.262e-05      0.022     -0.001   

In [6]:
# 5.2. Semi-Strong From Efficient Market Hypothesis

# 5.2.1. Test Technical Factor Statistical Significance
# Calculate OLS Regression
aregfactorssemi = ['aconstant', 'amomentumret', 'asizeret', 'astyleret']
aregressionsemi = rg.OLS(aemhall.loc[:, 'aspremiumret'], aemhall.loc[:, aregfactorssemi],
                        hasconst=bool).fit()
# Coefficients Statistical Significance p-value < 0.05 (95% of confidence)
print(aregressionsemi.summary())


                            OLS Regression Results                            
Dep. Variable:           aspremiumret   R-squared:                       0.171
Model:                            OLS   Adj. R-squared:                  0.142
Method:                 Least Squares   F-statistic:                     5.843
Date:                Mon, 06 Nov 2017   Prob (F-statistic):            0.00112
Time:                        12:25:06   Log-Likelihood:                 23.231
No. Observations:                  89   AIC:                            -38.46
Df Residuals:                      85   BIC:                            -28.51
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
aconstant        0.0635      0.026      2.455   

In [7]:
# 5.1.2. Test Regression Residuals White Noise
# Calculate Residuals
aemhall.loc[:, 'aemhsemires'] = aregressionsemi.resid
aemhall.loc[:, 'aemhsemires1'] = aemhall.loc[:, 'aemhsemires'].shift(1)
# Convert all NaN value to 0 as OLS regression function doesn't have exception
aemhall[np.isnan(aemhall)] = 0
# Calculate OLS Regression
aregfactsemires = ['aconstant', 'aemhsemires1']
aregsemires = rg.OLS(aemhall.loc[:, 'aemhsemires'], aemhall.loc[:, aregfactsemires],
                        hasconst=bool).fit()
# Coefficients Statistical Significance p-value < 0.05 (95% of confidence)
print(aregsemires.summary())

                            OLS Regression Results                            
Dep. Variable:            aemhsemires   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     2.028
Date:                Mon, 06 Nov 2017   Prob (F-statistic):              0.158
Time:                        12:25:23   Log-Likelihood:                 24.256
No. Observations:                  89   AIC:                            -44.51
Df Residuals:                      87   BIC:                            -39.54
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
aconstant    -3.306e-05      0.020     -0.002   