In [31]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import numpy.linalg as la

fama_french = pd.read_csv('fama_french_5factor.csv',index_col=0)
fama_french.index = pd.date_range('2010-01-01', periods=fama_french.shape[0], freq='M')

R = pd.read_csv('returns_out_of_sample.csv',index_col=0)
R.index = pd.date_range('2010-01-01', periods=R.shape[0], freq='M')

T, N = R.shape

R.head()

Unnamed: 0,Agric,Food,Soda,Beer,Smoke,Toys,Fun,Books,Hshld,Clths,...,Boxes,Trans,Whlsl,Rtail,Meals,Banks,Insur,RlEst,Fin,Other
2010-01-31,-6.64,0.6,-2.42,-3.7,-3.35,-2.32,2.34,3.55,0.85,-3.11,...,-8.64,-4.04,-2.55,-1.92,-0.98,0.44,0.73,-5.02,-7.99,3.54
2010-02-28,-6.6,3.26,13.7,0.25,4.43,14.52,3.15,-3.01,3.69,6.07,...,7.9,7.25,5.89,4.32,4.5,1.81,3.51,7.32,3.91,1.06
2010-03-31,2.04,4.46,8.2,6.11,6.12,6.57,14.13,5.47,2.13,11.39,...,6.06,8.11,5.99,6.14,8.27,8.46,8.1,14.73,6.96,9.82
2010-04-30,-10.17,0.18,-1.73,-1.76,-2.51,-0.9,15.13,2.31,1.21,4.23,...,1.84,3.82,3.2,1.85,7.31,3.05,-1.67,6.05,-1.45,3.81
2010-05-31,-18.19,-3.86,-0.22,-3.88,-7.69,-5.69,-7.47,-10.35,-4.35,-5.67,...,-8.49,-5.98,-4.01,-5.83,-4.76,-11.0,-5.88,-11.24,-8.15,-10.95


In [51]:
R_m = fama_french['Mkt-RF']
StockList = R.columns

# This isn't in the notebook, but the regression equation he gives doesn't make sense to me if I don't do it
for i in StockList:
    R[i] = R[i] - fama_french['RF']

Coefs = pd.DataFrame([la.lstsq(np.vstack((np.ones(T), R_m)).T, R[Stock], rcond=None)[0] for Stock in StockList], 
                     index=StockList, columns=['$\\alpha$','$\\beta$'])

display(Coefs)

Unnamed: 0,$\alpha$,$\beta$
Agric,-0.847564,1.111934
Food,0.127198,0.548272
Soda,0.538681,0.46169
Beer,0.396821,0.476605
Smoke,0.270367,0.66633
Toys,-0.202771,1.070275
Fun,0.155283,1.380038
Books,-0.72459,1.17282
Hshld,0.093934,0.574096
Clths,0.189957,0.980414


In [57]:
import statsmodels.formula.api as smf

df = pd.concat([R,fama_french],axis=1)

coefs_3fact = pd.DataFrame(index=StockList,columns=['$\\alpha$','$\\beta_{Mkt-RF}$','$\\beta_{SBM}$','$\\beta_{HML}$'])

for i in StockList:
    res = smf.ols("{} ~ Q('Mkt-RF') + SMB + HML".format(i),df).fit()

    for j in range(len(coefs_3fact.columns)):
        coefs_3fact.loc[i,coefs_3fact.columns[j]] = res.params[j]

coefs_3fact

Unnamed: 0,$\alpha$,$\beta_{Mkt-RF}$,$\beta_{SBM}$,$\beta_{HML}$
Agric,-0.876085,1.13275,-0.080573,-0.0148731
Food,0.0383621,0.620565,-0.29491,-0.0021256
Soda,0.329305,0.612293,-0.578581,-0.122168
Beer,0.217049,0.596961,-0.444056,-0.157919
Smoke,0.00815736,0.87711,-0.855142,-0.0216827
Toys,-0.245601,1.05524,0.151567,-0.296466
Fun,0.131445,1.3083,0.4575,-0.540299
Books,-0.539114,1.055,0.420755,0.200534
Hshld,-0.0102829,0.653689,-0.315254,-0.033389
Clths,0.237911,0.927782,0.239319,-0.0794394


In [59]:
coefs_5fact = pd.DataFrame(index=StockList,columns=['$\\alpha$','$\\beta_{Mkt-RF}$','$\\beta_{SBM}$','$\\beta_{HML}$','$\\beta_{RMW}$','$\\beta_{CMA}$'])

for i in StockList:
    res = smf.ols("{} ~ Q('Mkt-RF') + SMB + HML + RMW + CMA".format(i),df).fit()

    for j in range(len(coefs_5fact.columns)):
        coefs_5fact.loc[i,coefs_5fact.columns[j]] = res.params[j]

coefs_5fact

Unnamed: 0,$\alpha$,$\beta_{Mkt-RF}$,$\beta_{SBM}$,$\beta_{HML}$,$\beta_{RMW}$,$\beta_{CMA}$
Agric,-0.79247,1.1077,-0.181001,-0.020934,-0.440279,0.00671299
Food,-0.0918091,0.6612,-0.233267,-0.189932,0.280565,0.462495
Soda,0.101459,0.683244,-0.460599,-0.429895,0.534198,0.759172
Beer,0.041033,0.651397,-0.331224,-0.350472,0.505399,0.478162
Smoke,-0.290117,0.969318,-0.661717,-0.343352,0.865942,0.79919
Toys,-0.404548,1.10357,0.301584,-0.370109,0.662136,0.191444
Fun,0.272149,1.26366,0.432056,-0.251514,-0.127192,-0.705606
Books,-0.622079,1.0809,0.459564,0.0798359,0.176769,0.297168
Hshld,-0.169151,0.703362,-0.244648,-0.272235,0.322634,0.587562
Clths,0.0986984,0.969625,0.398583,-0.0858862,0.699084,0.0284772
