# Fama-MacBeth Characteristic Regressions
(Based on Lewellen's Critical Finance Review article)

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from pandas_datareader import DataReader as pdr
from scipy import stats
import plotly.graph_objects as go
pd.options.display.float_format = '{:,.4f}'.format

### Pull data from dropbox

In [2]:
df = pd.read_csv('https://www.dropbox.com/s/w2vb22u6kh7ysvn/stocks.csv?dl=1')
df['mdate']=pd.to_datetime(df.date,format='%Y%m%d').dt.to_period('M')
df.head()

Unnamed: 0,ticker,date,ret,beta,mve,bm,mom12m,illiq,idiovol,mdate
0,GFGC,19880229,0.08,0.0519,8.7323,0.9222,-0.068,0.0,0.0337,1988-02
1,GFGC,19880331,-0.0763,0.0508,8.8093,0.9222,0.0708,0.0,0.0339,1988-03
2,GFGC,19880429,0.0306,0.047,8.7121,0.9222,0.1154,0.0,0.0336,1988-04
3,GFGC,19880531,0.0198,0.0363,8.7423,0.9222,0.0724,0.0,0.0332,1988-05
4,GFGC,19880630,-0.012,0.0243,8.7619,0.9222,0.1902,0.0,0.0334,1988-06


Remove outliers

In [3]:
cols_to_trim_on = ['ret', 'beta', 'mve', 'bm', 'mom12m', 'illiq','idiovol']
df = df[(np.abs(stats.zscore(df[cols_to_trim_on])) < 5).all(axis=1)]
df.ret.describe()

count   2,007,750.0000
mean            0.0069
std             0.1533
min            -0.9354
25%            -0.0657
50%             0.0000
75%             0.0712
max             0.9584
Name: ret, dtype: float64

Add in risk-free rate



In [4]:
ff3 = pdr("F-F_Research_Data_Factors", "famafrench", start=1926)[0]/100
df = df.merge(ff3.RF, how='inner', left_on='mdate', right_index=True)
df['xret'] = df.ret - df.RF

### Run cross-sectional regressions and take time-series average

In [5]:
CHARS = ['beta','mve', 'bm','mom12m']
coefs = df.groupby('date').apply(lambda d: sm.OLS(d.xret,sm.add_constant(d[CHARS])).fit().params) 

In [6]:
T = len(coefs)
stats = pd.DataFrame(dtype=float,index=coefs.columns,columns=['mean','tstat','pval'])
for coef in coefs.columns :
    tsreg = sm.OLS(coefs[coef],np.ones(T)).fit()

    stats.loc[coef,'mean'] = coefs[coef].mean()
    stats.loc[coef,'tstat'] = tsreg.tvalues['const']
    stats.loc[coef,'pval'] = tsreg.pvalues['const']
stats.round(4)

Unnamed: 0,mean,tstat,pval
const,-0.0151,-3.2798,0.0011
beta,-0.0028,-1.9697,0.0494
mve,0.0014,4.2606,0.0
bm,0.0046,6.7982,0.0
mom12m,0.0108,8.8704,0.0


### Multiply current characteristics to get expected return estimate

In [7]:
TICKER='IBM'
ds = df[df.ticker==TICKER]
last = ds.sort_values(by='date').tail(1)
last

Unnamed: 0,ticker,date,ret,beta,mve,bm,mom12m,illiq,idiovol,mdate,RF,xret
181626,IBM,20221230,-0.0538,0.7553,18.718,0.1575,0.2265,0.0,0.0313,2022-12,0.0033,-0.0571


In [8]:
lag_char = last[CHARS]
ts_avgs = stats['mean']
excess_return = ts_avgs['const'] + last[CHARS] @ ts_avgs[CHARS]
excess_return = 12*excess_return.values[0]
print(f'The estimated annual excess return for {TICKER} is:\t {excess_return: .1%}')

The estimated annual excess return for IBM is:	  15.5%


In [9]:
# Most recent risk-free rate (in decimal notation)
rf = pdr("DGS3MO", "fred", start=1920).iloc[-1]/100
rf = rf[0]
print(f'The current risk-free rate is:\t {rf: .1%}')

The current risk-free rate is:	  4.7%


In [10]:
expret = rf + excess_return

print(f'The estimated expected return for {TICKER} is:\t {expret: .1%}')

The estimated expected return for IBM is:	  20.2%
