<a href="https://colab.research.google.com/github/letianzj/QuantResearch/blob/master/notebooks/fama_macbeth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import yfinance as yf

In [None]:
# https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
# pdr.famafrench.get_available_datasets()
df_ff = pdr.data.DataReader('F-F_Research_Data_Factors', 'famafrench')[0]
df_ff.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-06,-1.53,2.83,-0.79,0.0
2015-07,1.54,-4.15,-4.12,0.0
2015-08,-6.04,0.49,2.66,0.0
2015-09,-3.08,-2.64,0.53,0.0
2015-10,7.75,-1.97,-0.07,0.0


In [None]:
start_date = datetime(2015, 1, 1)
end_date = datetime.today()
sectors = ['XLB', 'XLC', 'XLF', 'XLI', 'XLK', 'XLP', 'XLRE', 'XLU', 'XLV', 'XLY', 'XLE']

df_sectors = pd.DataFrame()
for sym in sectors:
    print(sym)
    # df = downloadpdr.DataReader(name=sym, data_source='yahoo', start=start_date, end=end_date)
    df = yf.download(sym, start=start_date, end=end_date)
    df = df[['Adj Close']]
    df.columns = [sym]
    df_sectors = pd.concat([df_sectors, df], axis=1, join='outer')

XLB
[*********************100%***********************]  1 of 1 completed
XLC
[*********************100%***********************]  1 of 1 completed
XLF
[*********************100%***********************]  1 of 1 completed
XLI
[*********************100%***********************]  1 of 1 completed
XLK
[*********************100%***********************]  1 of 1 completed
XLP
[*********************100%***********************]  1 of 1 completed
XLRE
[*********************100%***********************]  1 of 1 completed
XLU
[*********************100%***********************]  1 of 1 completed
XLV
[*********************100%***********************]  1 of 1 completed
XLY
[*********************100%***********************]  1 of 1 completed
XLE
[*********************100%***********************]  1 of 1 completed


In [None]:
df_sec_ret = df_sectors.resample('M').agg(lambda x: x[-1])
df_sec_ret.index = df_sec_ret.index.to_period()
df_sec_ret = df_sec_ret.pct_change()
df_sec_ret.head()

Unnamed: 0_level_0,XLB,XLC,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XLE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01,,,,,,,,,,,
2015-02,0.079681,,0.058236,0.053509,0.07995,0.041441,,-0.063948,0.042876,0.085441,0.04593
2015-03,-0.049048,,-0.00616,-0.025461,-0.034356,-0.019427,,-0.009953,0.006406,-0.00483,-0.011552
2015-04,0.03362,,0.00083,-0.00251,0.02751,-0.007591,,-0.004726,-0.010897,-0.000531,0.065739
2015-05,0.003768,,0.019478,0.003236,0.018553,0.008683,,0.006332,0.045043,0.013146,-0.051887


In [None]:
df_sec_ret = df_sec_ret.apply(lambda x: x-df_ff['RF']/100.0)
df_sec_ret.dropna(axis=0, inplace=True)
df_Y = df_sec_ret

df_X = df_ff[['Mkt-RF', 'SMB', 'HML']]/100.0
df_X = df_X.loc[df_Y.index]
print(f'{df_Y.shape[1]} stocks, {df_X.shape[1]} factors, {df_Y.shape[0]} time steps')

df_X = sm.add_constant(df_X, prepend=False)

11 stocks, 3 factors, 23 time steps


In [None]:
# fama_macbeth step one: time-series regression ==> factor exposures
beta = pd.DataFrame()             # factor exposures
for sym in df_Y.columns:
    model = sm.OLS(df_Y[sym], df_X)
    results = model.fit()
    beta = pd.concat([beta, pd.DataFrame([results.params[:3]], index=[sym])])

In [None]:
beta

Unnamed: 0,Mkt-RF,SMB,HML
XLB,0.963386,0.179835,0.134132
XLC,0.890566,0.14935,-0.031066
XLF,1.015531,-0.140469,0.584023
XLI,1.075992,0.038404,0.175437
XLK,1.096879,-0.231479,-0.314548
XLP,0.75999,-0.916659,0.055349
XLRE,0.64325,-0.040101,0.176523
XLU,0.589555,-0.709289,0.168439
XLV,0.76306,-0.010717,-0.242951
XLY,1.150076,-0.029591,-0.090732


In [None]:
# fama_macbeth step two: cross-sectional regression ==> factor risk premia
rp = pd.DataFrame()
for dt in df_Y.index:
    X = beta.copy()
    X['const'] = 1.0
    model = sm.OLS(df_Y.loc[dt], X)
    results = model.fit()
    rp = pd.concat([rp, pd.DataFrame([results.params], index=[dt])])

In [None]:
rp

Unnamed: 0,Mkt-RF,SMB,HML,const
2018-07,0.010842,-0.013377,0.002844,0.0134
2018-08,0.007729,-0.0045,-0.073922,0.015024
2018-09,0.045209,-0.009583,-0.021963,-0.043057
2018-10,-0.108396,-0.069259,0.048361,0.026377
2018-11,-0.094186,0.025573,0.010437,0.111888
2018-12,-0.051775,-0.002154,-0.015799,-0.039265
2019-01,0.015931,0.042001,0.016713,0.065447
2019-02,0.045286,-0.025212,-0.015308,-0.020743
2019-03,0.013578,-0.017494,-0.032417,0.005187
2019-04,0.057736,-0.021578,-0.010887,-0.027886


In [None]:
# APT
risk_premia = rp.agg(['mean','std']).T
risk_premia['t-stats'] = risk_premia['mean'] / (risk_premia['std'] / rp.shape[0])
risk_premia

Unnamed: 0,mean,std,t-stats
Mkt-RF,-0.006066,0.075774,-1.841092
SMB,-0.005482,0.029661,-4.250841
HML,-0.022718,0.045078,-11.59112
const,0.009914,0.042297,5.390842
