In [42]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.finance as finance
import datetime
import pandas as pd
import statsmodels.api as sm
from scipy.stats import f

In [2]:
# Define data path 
#ddir = 'C:/Users/mphan21/Google Drive/Academics/Spring 2018/BehavioralFinance/hmwk2/'
ddir = 'C:/Users/mphan/Documents/Behavioral-Finance/hmwk2/'

# Monthly equal weight
me_mon_ew = pd.read_csv(ddir+'me_mon_ew.csv')
me_mon_ew['date']=pd.to_datetime(me_mon_ew['date'], \
                                 format="%Y%m").dt.strftime('%Y%m')
me_mon_ew.iloc[:,1:me_mon_ew.shape[1]]=me_mon_ew.iloc[:,1:me_mon_ew.shape[1]]/10

# Monthly value weight
me_mon_vw = pd.read_csv(ddir+'me_mon_vw.csv')
me_mon_vw['date']=pd.to_datetime(me_mon_vw['date'], \
                                 format="%Y%m").dt.strftime('%Y%m')
me_mon_vw.iloc[:,1:me_mon_vw.shape[1]]=me_mon_vw.iloc[:,1:me_mon_vw.shape[1]]/10

# Jan 1963 - Dec 2017
aqr_ff_data = pd.read_csv(ddir + 'aqr_ff_data.csv')
aqr_ff_data['date'] = pd.to_datetime(aqr_ff_data['date'], \
                                     format="%Y%m").dt.strftime('%Y%m')

# Fama French Monthly
fama_french = pd.read_csv(ddir+'fama_french.csv')
fama_french.columns = ['date', 'mktrf', 'smb', 'hml', 'rf', 'umd']
fama_french['date'] = pd.to_datetime(fama_french['date'], \
                                     format='%Y%m%d').dt.strftime('%Y%m')

# Problem 1

## a) average monthly returns and z-statistics for each portfolio.

In [51]:
me_mon_ew.head()

Unnamed: 0,date,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10
0,192607,-0.145,0.029,-0.015,0.133,0.124,0.198,0.155,0.138,0.338,0.329
1,192608,0.512,0.259,0.403,0.315,0.272,0.472,0.16,0.163,0.098,0.37
2,192609,0.093,-0.187,-0.227,-0.053,0.007,-0.007,-0.164,0.064,-0.086,0.067
3,192610,-0.484,-0.177,-0.336,-0.483,-0.298,-0.28,-0.345,-0.327,-0.347,-0.243
4,192611,-0.078,-0.032,-0.029,0.465,0.324,0.357,0.382,0.295,0.361,0.27


Average Returns

In [6]:
avg_mon_ew=me_mon_ew[me_mon_ew.columns[1:me_mon_ew.shape[1]]].mean()
avg_mon_ew

Lo 10    0.180088
2-Dec    0.136317
3-Dec    0.129316
4-Dec    0.122953
5-Dec    0.116696
6-Dec    0.119068
7-Dec    0.110369
8-Dec    0.107329
9-Dec    0.101801
Hi 10    0.089945
dtype: float64

Standard Deviation

In [7]:
std_mon_ew=me_mon_ew[me_mon_ew.columns[1:me_mon_ew.shape[1]]].std()
std_mon_ew

Lo 10    1.066394
2-Dec    0.914241
3-Dec    0.821322
4-Dec    0.773996
5-Dec    0.722719
6-Dec    0.694519
7-Dec    0.660380
8-Dec    0.627549
9-Dec    0.596683
Hi 10    0.540110
dtype: float64

Z Score

In [8]:
z_mon_ew=avg_mon_ew/std_mon_ew
z_mon_ew

Lo 10    0.168876
2-Dec    0.149104
3-Dec    0.157449
4-Dec    0.158854
5-Dec    0.161468
6-Dec    0.171440
7-Dec    0.167129
8-Dec    0.171028
9-Dec    0.170611
Hi 10    0.166532
dtype: float64

## b) CAPM alphas and t-statistics

In [9]:
mon_ew_combine=pd.merge(me_mon_ew,fama_french,on='date')
mon_ew_combine.head()

Unnamed: 0,date,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10,mktrf,smb,hml,rf,umd
0,192607,-0.145,0.029,-0.015,0.133,0.124,0.198,0.155,0.138,0.338,0.329,0.0296,-0.023,-0.0287,0.0022,
1,192608,0.512,0.259,0.403,0.315,0.272,0.472,0.16,0.163,0.098,0.37,0.0264,-0.014,0.0419,0.0025,
2,192609,0.093,-0.187,-0.227,-0.053,0.007,-0.007,-0.164,0.064,-0.086,0.067,0.0036,-0.0132,0.0001,0.0023,
3,192610,-0.484,-0.177,-0.336,-0.483,-0.298,-0.28,-0.345,-0.327,-0.347,-0.243,-0.0324,0.0004,0.0051,0.0032,
4,192611,-0.078,-0.032,-0.029,0.465,0.324,0.357,0.382,0.295,0.361,0.27,0.0253,-0.002,-0.0035,0.0031,


In [11]:
alphas = [0] * 10
tstats = [0] * 10
mon_ew_resid = me_mon_ew
for portfolio in range(1,11):
    print("Portfolio: "+mon_ew_combine.columns[portfolio])
    cur_mod=sm.OLS(mon_ew_combine[mon_ew_combine.columns[portfolio]]-mon_ew_combine['rf'], 
                   sm.add_constant(mon_ew_combine['mktrf']), missing='drop').fit()
    alphas[portfolio-1] = cur_mod.params[0]
    tstats[portfolio-1] = cur_mod.tvalues[0]
    mon_ew_resid[mon_ew_combine.columns[portfolio]]=cur_mod.resid
    print(cur_mod.summary())
    print(portfolio)

Portfolio: Lo 10
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.512
Model:                            OLS   Adj. R-squared:                  0.511
Method:                 Least Squares   F-statistic:                     1148.
Date:                Tue, 20 Feb 2018   Prob (F-statistic):          9.33e-173
Time:                        19:50:37   Log-Likelihood:                -1234.8
No. Observations:                1098   AIC:                             2474.
Df Residuals:                    1096   BIC:                             2484.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0826      0.023   

CAPM alphas and their t-stats

In [18]:
reg_out_ew = pd.DataFrame(alphas).transpose().append(pd.DataFrame(tstats).transpose(), ignore_index=True)
reg_out_ew.index = ['alphas', 'tstat']
reg_out_ew.columns = me_mon_ew.columns[1:me_mon_ew.shape[1]]
reg_out_ew

Unnamed: 0,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10
alphas,0.082626,0.038446,0.036191,0.033829,0.030595,0.034682,0.029262,0.029067,0.026805,0.021251
tstat,3.643518,2.531968,3.12532,3.275718,3.755011,5.091765,4.942877,6.170298,6.665939,7.118812


**GRS Test**

$$ F_{N, T-N-K} \sim \frac{T-N-K}{N}\frac{\hat{\alpha}'\hat{\Sigma}^{-1}\hat{\alpha}}{1+\hat{\mu}'\hat{\Omega}^{-1}\hat{\mu}}$$

In [21]:
sigma_resid=mon_ew_resid.iloc[:,1:mon_ew_resid.shape[1]].cov()
sigma_resid

Unnamed: 0,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10
Lo 10,0.555582,0.326793,0.227802,0.190871,0.133383,0.099689,0.08294,0.036931,0.017338,-0.01405
2-Dec,0.326793,0.249085,0.174076,0.150726,0.10876,0.082173,0.066739,0.033915,0.014537,-0.009497
3-Dec,0.227802,0.174076,0.144868,0.118576,0.088386,0.06847,0.05434,0.031933,0.014609,-0.007458
4-Dec,0.190871,0.150726,0.118576,0.115219,0.080624,0.063981,0.051339,0.030721,0.014533,-0.005579
5-Dec,0.133383,0.10876,0.088386,0.080624,0.071721,0.051384,0.04195,0.027465,0.014373,-0.003227
6-Dec,0.099689,0.082173,0.06847,0.063981,0.051384,0.050121,0.034838,0.024598,0.014686,-0.001463
7-Dec,0.08294,0.066739,0.05434,0.051339,0.04195,0.034838,0.037862,0.02178,0.013188,0.000148
8-Dec,0.036931,0.033915,0.031933,0.030721,0.027465,0.024598,0.02178,0.023974,0.012835,0.001789
9-Dec,0.017338,0.014537,0.014609,0.014533,0.014373,0.014686,0.013188,0.012835,0.017469,0.004161
Hi 10,-0.01405,-0.009497,-0.007458,-0.005579,-0.003227,-0.001463,0.000148,0.001789,0.004161,0.009627


In [37]:
#alpha*sigma^-1*alpha
resid_quad = np.dot(alphas,pd.DataFrame(np.linalg.inv(sigma_resid.values), 
                           sigma_resid.columns, sigma_resid.index).dot(alphas))
resid_quad

0.095660321064772985

In [38]:
#(1+E(F)Cov(F)^-1E(F))
mu_factors = mon_ew_combine['mktrf'].mean()
sigma_factors = mon_ew_combine['mktrf'].var()
factors_quad=(1+mu_factors*1/sigma_factors*mu_factors)
factors_quad

1.0154114057149388

In [43]:
N=10
T=mon_ew_combine.shape[0] # (1098)
K=1

F_stat = (T-N-K)/N*resid_quad/factors_quad
F_stat

10.240457061263285

In [53]:
pvalue=1-f.cdf(F_stat, N, T-N-K)
pvalue

0.00015662121727111344

**Since the p< 0.01, we reject the hypothesis that the sized ranked deciles are priced by the market portfolio at the 1% significance level**

# Problem 2 - Value Weighted 

In [28]:
mon_vw_combine=pd.merge(me_mon_vw,fama_french,on='date')
mon_vw_combine.head()

Unnamed: 0,date,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10,mktrf,smb,hml,rf,umd
0,192607,-0.012,0.052,-0.005,0.131,0.121,0.204,0.158,0.129,0.353,0.371,0.0296,-0.023,-0.0287,0.0022,
1,192608,0.133,0.255,0.4,0.32,0.281,0.445,0.161,0.149,0.061,0.379,0.0264,-0.014,0.0419,0.0025,
2,192609,0.059,-0.2,-0.201,-0.046,-0.006,0.014,-0.202,0.074,-0.077,0.125,0.0036,-0.0132,0.0001,0.0023,
3,192610,-0.433,-0.201,-0.325,-0.435,-0.293,-0.248,-0.36,-0.326,-0.336,-0.256,-0.0324,0.0004,0.0051,0.0032,
4,192611,-0.33,-0.023,0.008,0.474,0.364,0.344,0.363,0.305,0.386,0.24,0.0253,-0.002,-0.0035,0.0031,


In [29]:
alphas_vw = [0] * 10
tstats_vw = [0] * 10
mon_vw_resid = me_mon_vw
for portfolio in range(1,11):
    print("Portfolio: "+mon_vw_combine.columns[portfolio])
    cur_mod=sm.OLS(mon_vw_combine[mon_vw_combine.columns[portfolio]]-mon_vw_combine['rf'], 
                   sm.add_constant(mon_vw_combine['mktrf']), missing='drop').fit()
    alphas_vw[portfolio-1] = cur_mod.params[0]
    tstats_vw[portfolio-1] = cur_mod.tvalues[0]
    mon_vw_resid[mon_vw_combine.columns[portfolio]]=cur_mod.resid
    print(cur_mod.summary())
    print(portfolio)

Portfolio: Lo 10
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.582
Method:                 Least Squares   F-statistic:                     1526.
Date:                Tue, 20 Feb 2018   Prob (F-statistic):          7.96e-210
Time:                        20:28:17   Log-Likelihood:                -1069.9
No. Observations:                1098   AIC:                             2144.
Df Residuals:                    1096   BIC:                             2154.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0432      0.020   

In [31]:
reg_out_vw = pd.DataFrame(alphas_vw).transpose().append(pd.DataFrame(tstats_vw).transpose(), ignore_index=True)
reg_out_vw.index = ['alphas', 'tstat']
reg_out_vw.columns = me_mon_vw.columns[1:me_mon_vw.shape[1]]
reg_out_vw

Unnamed: 0,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10
alphas,0.043212,0.031944,0.034774,0.035488,0.032321,0.035906,0.031788,0.031393,0.027948,0.024527
tstat,2.214365,2.284532,3.199725,3.690703,4.193416,5.511876,5.721362,7.024332,8.130324,9.351996


**GRS Test**

$$ F_{N, T-N-K} \sim \frac{T-N-K}{N}\frac{\hat{\alpha}'\hat{\Sigma}^{-1}\hat{\alpha}}{1+\hat{\mu}'\hat{\Omega}^{-1}\hat{\mu}}$$

In [32]:
sigma_resid_vw=mon_vw_resid.iloc[:,1:mon_vw_resid.shape[1]].cov()
sigma_resid_vw

Unnamed: 0,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10
Lo 10,0.4114,0.26536,0.193061,0.159189,0.115651,0.087675,0.069109,0.035973,0.017414,-0.030193
2-Dec,0.26536,0.211224,0.148704,0.125569,0.095023,0.071254,0.055192,0.031034,0.013024,-0.024972
3-Dec,0.193061,0.148704,0.127596,0.10223,0.077829,0.060646,0.045316,0.029328,0.012572,-0.021202
4-Dec,0.159189,0.125569,0.10223,0.099888,0.070288,0.056794,0.043292,0.027591,0.01254,-0.019581
5-Dec,0.115651,0.095023,0.077829,0.070288,0.06418,0.045254,0.034669,0.023273,0.010735,-0.015936
6-Dec,0.087675,0.071254,0.060646,0.056794,0.045254,0.045846,0.02952,0.021296,0.011136,-0.013482
7-Dec,0.069109,0.055192,0.045316,0.043292,0.034669,0.02952,0.033349,0.018324,0.00972,-0.010964
8-Dec,0.035973,0.031034,0.029328,0.027591,0.023273,0.021296,0.018324,0.021579,0.009083,-0.008354
9-Dec,0.017414,0.013024,0.012572,0.01254,0.010735,0.011136,0.00972,0.009083,0.012766,-0.004374
Hi 10,-0.030193,-0.024972,-0.021202,-0.019581,-0.015936,-0.013482,-0.010964,-0.008354,-0.004374,0.007431


In [48]:
resid_quad_vw = np.dot(alphas_vw,pd.DataFrame(np.linalg.inv(sigma_resid_vw.values), 
                           sigma_resid_vw.columns, sigma_resid_vw.index).dot(alphas_vw))
resid_quad_vw

0.60328583578021266

In [49]:
#(1+E(F)Cov(F)^-1E(F))
mu_factors_vw = mon_vw_combine['mktrf'].mean()
sigma_factors_vw = mon_vw_combine['mktrf'].var()
factors_quad_vw=(1+mu_factors_vw*1/sigma_factors_vw*mu_factors_vw)
factors_quad_vw

1.0154114057149388

In [50]:
N=10
T=mon_vw_combine.shape[0]
K=1

F_stat_vw = (T-N-K)/N*resid_quad_vw/factors_quad_vw
F_stat_vw

64.581872904152604

In [54]:
pvalue_vw=1-f.cdf(F_stat_vw, T-N-K, N)
pvalue_vw

2.2122544018898793e-08

# Problem 3- Daily Data

In [18]:
day_ew_combine=pd.merge(me_mon_vw,fama_french,on='date')
day_ew_combine.head()

Unnamed: 0,date,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10,mktrf,smb,hml,rf,umd
0,192607,-0.476772,-0.391411,-0.434708,-0.278124,-0.276329,-0.189567,-0.215986,-0.233658,0.008907,0.06958,0.0296,-0.023,-0.0287,0.0022,
1,192608,-0.286735,-0.144466,0.01245,-0.049268,-0.077407,0.089561,-0.176529,-0.178383,-0.249453,0.106976,0.0264,-0.014,0.0419,0.0025,
2,192609,-0.037515,-0.28402,-0.285839,-0.128963,-0.084747,-0.067438,-0.277439,0.000285,-0.145431,0.064764,0.0036,-0.0132,0.0001,0.0023,
3,192610,-0.020383,0.211837,0.066911,-0.067118,0.068606,0.101979,-0.022829,-0.000401,-0.023506,0.016951,-0.0324,0.0004,0.0051,0.0032,
4,192611,-0.734751,-0.407856,-0.365555,0.117935,0.018476,0.001171,0.037506,-0.010754,0.086614,-0.022415,0.0253,-0.002,-0.0035,0.0031,


# Problem 4 - Cumulative Return

In [64]:
me_mon_ew_sample = me_mon_ew[me_mon_ew['date']>='1927-01'].set_index('date')
me_mon_ew_sample.head()

Unnamed: 0_level_0,Lo 10,2-Dec,3-Dec,4-Dec,5-Dec,6-Dec,7-Dec,8-Dec,9-Dec,Hi 10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
192701,0.376443,0.044659,0.003485,0.189485,-0.089555,0.185204,0.136327,0.058264,0.004231,-0.060785
192702,0.002844,0.07845,-0.077383,0.0242,0.004488,0.254204,0.149295,-0.113578,0.006272,-0.074459
192703,-0.695191,-0.225091,-0.235906,-0.177759,-0.363933,-0.392683,-0.045621,-0.265868,-0.089966,0.047823
192704,0.316183,0.39408,-0.033373,-0.158236,-0.047905,-0.116803,-0.106108,-0.09594,-0.140412,0.020512
192705,-0.331492,0.074338,-0.201478,0.025707,-0.110261,0.003711,-0.035967,-0.046434,-0.052378,-0.040138


In [None]:
plt.figure(figsize=(8, 7))
#plt.locator_params(nbins=12, axis='x')
plt.plot(me_mon_ew.cumsum())
plt.autoscale(tight=True)
plt.legend(['Lo 10', '2-Dec', '3-Dec', '4-Dec', '5-Dec', '6-Dec', '7-Dec', '8-Dec', '9-Dec', 'Hi 10'])
plt.xlabel('Date')
plt.ylabel('Cumulative returns (in log dollars)')
plt.show()

# Question 6: EW Methology

The VW index is overly represented by a number of very large companies. The index will have a few stocks with market caps that are considerably higher than the average, and most of the stocks will be below the average weight of the index. By contrast, the EW index underweights a few large stocks but overweight a large number of smaller stocks. By definition, the VW index will have a lower stocks concentration. 

Advantages of EW index:
* The index is highly diversified with all stocks in the universe equally weighted.
* As opposed to market cap weighting, the index does not overweight overpriced stocks and underweight underpriced stocks. Pricing errors are random.
* Easy to construct relatively tax efficient ETFs and mutual funds.
* Usually adds 1-2 percent in annual return over long periods after expenses vs. market cap weighted indexes.

Disadvantages of EW index:
* No distinction is made between the relative or absolute valuation of stocks within the universe.
* Difficult to keep the stocks in the index equally weighted due to constant price fluctuations.
* Difficult for this type of index to manage substantial amounts of money due to the need to invest equal amounts in both the largest and smallest stocks.


Advantages of VW index:
* The total return of the index roughly mirrors the change in the total market value of all stocks.
* Rebalancing this type of index is simple.
* Since the index automatically adjusts to changes in stock prices, it is easy to create a tax efficient mutual fund or ETF to track this type of index.

Disadvantages of VW index:
* If stock prices reflect emotions over the short term, then the index will systematically own too much of overpriced stocks and too little of bargain priced stocks.
* The index is heavily influenced by the few companies with the largest market capitalizations. For instance, the top 20 stocks in the S&P 500 index can account for one-third of the total index.