# Equity asset pricing models

In [40]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from pandas_datareader import DataReader as pdr
import plotly.graph_objects as go
import plotly.express as px


In [66]:
# Pull data
ff3 = pdr("F-F_Research_Data_Factors", "famafrench", start=1926)[0]/100
ff5 = pdr("F-F_Research_Data_5_Factors_2x3", "famafrench", start=1964)[0]/100

In [38]:
# Cumulative returns
cum_rets = ff3 +1 
vars = ['Mkt-RF', 'SMB', 'HML']
for v in vars:
    cum_rets[v] = cum_rets[v].cumprod()


In [57]:
# Plot cumulative returns
fig = go.Figure()
cum_rets.index = cum_rets.index.astype(str)
for v in vars:
    trace  = go.Scatter(x=cum_rets.index, y=cum_rets[v], mode="lines", name=v,
    hovertemplate= v + "<br>Date: %{x}<br>Accumulation: %{y:.0f}<extra></extra>")
    fig.add_trace(trace)
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Accumulation from $1",
    yaxis_tickformat=",.0f",
    width=1000,
    height=460,
    legend=dict(
        yanchor="top", 
        y=0.99, 
        xanchor="left", 
        x=0.01
    ),
    yaxis_type="log",
  )
fig.show()


In [60]:
results = sm.OLS(ff3.HML, sm.add_constant(ff3['Mkt-RF'])).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    HML   R-squared:                       0.054
Model:                            OLS   Adj. R-squared:                  0.053
Method:                 Least Squares   F-statistic:                     65.27
Date:                Thu, 15 Dec 2022   Prob (F-statistic):           1.63e-15
Time:                        13:35:35   Log-Likelihood:                -3077.6
No. Observations:                1156   AIC:                             6159.
Df Residuals:                    1154   BIC:                             6169.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2557      0.103      2.486      0.0

In [61]:
results = sm.OLS(ff3.SMB, sm.add_constant(ff3['Mkt-RF'])).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    SMB   R-squared:                       0.100
Model:                            OLS   Adj. R-squared:                  0.099
Method:                 Least Squares   F-statistic:                     128.0
Date:                Thu, 15 Dec 2022   Prob (F-statistic):           3.17e-28
Time:                        13:35:38   Log-Likelihood:                -2912.8
No. Observations:                1156   AIC:                             5830.
Df Residuals:                    1154   BIC:                             5840.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0676      0.089      0.758      0.4

## Momentum

In [80]:
umd = pdr('F-F_Momentum_Factor','famafrench', start=1900)[0]/100
ffc = ff3.merge(umd,how='inner', left_index=True,right_index=True)
ffc.columns = ['Mkt-RF', 'SMB', 'HML', 'RF', 'WML']

In [81]:
# Cumulative returns
cum_rets = ffc +1 
vars = ['Mkt-RF', 'SMB', 'HML','WML']
for v in vars:
    cum_rets[v] = cum_rets[v].cumprod()

In [82]:
# Plot cumulative returns
fig = go.Figure()
cum_rets.index = cum_rets.index.astype(str)
for v in vars:
    trace  = go.Scatter(x=cum_rets.index, y=cum_rets[v], mode="lines", name=v,
    hovertemplate= v + "<br>Date: %{x}<br>Accumulation: %{y:.0f}<extra></extra>")
    fig.add_trace(trace)
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Accumulation from $1",
    yaxis_tickformat=",.0f",
    width=1000,
    height=460,
    legend=dict(
        yanchor="top", 
        y=0.99, 
        xanchor="left", 
        x=0.01
    ),
    yaxis_type="log",
  )
fig.show()

In [83]:
# Estimate CAPM alpha
results = sm.OLS(ffc.WML, sm.add_constant(ffc['Mkt-RF'])).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    WML   R-squared:                       0.118
Model:                            OLS   Adj. R-squared:                  0.117
Method:                 Least Squares   F-statistic:                     153.7
Date:                Thu, 15 Dec 2022   Prob (F-statistic):           3.21e-33
Time:                        15:11:30   Log-Likelihood:                 1958.6
No. Observations:                1150   AIC:                            -3913.
Df Residuals:                    1148   BIC:                            -3903.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0085      0.001      6.466      0.0

In [85]:
# Estimate FF3 alpha
results = sm.OLS(ffc.WML, sm.add_constant(ffc[['Mkt-RF','SMB','HML']])).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    WML   R-squared:                       0.238
Model:                            OLS   Adj. R-squared:                  0.236
Method:                 Least Squares   F-statistic:                     119.4
Date:                Thu, 15 Dec 2022   Prob (F-statistic):           2.75e-67
Time:                        15:11:59   Log-Likelihood:                 2042.8
No. Observations:                1150   AIC:                            -4078.
Df Residuals:                    1146   BIC:                            -4057.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0097      0.001      7.933      0.0

## Fama-French 5-factor model

In [86]:
# Cumulative returns
cum_rets = ff5 +1 
vars = ['Mkt-RF', 'SMB', 'HML','RMW','CMA']
for v in vars:
    cum_rets[v] = cum_rets[v].cumprod()

In [89]:
# Plot cumulative returns
fig = go.Figure()
cum_rets.index = cum_rets.index.astype(str)
for v in vars:
    trace  = go.Scatter(x=cum_rets.index, y=cum_rets[v], mode="lines", name=v,
    hovertemplate= v + "<br>Date: %{x}<br>Accumulation: %{y:.0f}<extra></extra>")
    fig.add_trace(trace)
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Accumulation from $1",
    yaxis_tickformat=",.0f",
    width=1000,
    height=460,
    legend=dict(
        yanchor="top", 
        y=0.99, 
        xanchor="left", 
        x=0.01
    ),
    yaxis_type="log",
  )
fig.show()

In [93]:
# Estimate RMW alphas
results = sm.OLS(ff5.RMW, sm.add_constant(ff5['Mkt-RF'])).fit()
print(results.summary())

results = sm.OLS(ff5.RMW, sm.add_constant(ff5[['Mkt-RF','SMB','HML']])).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    RMW   R-squared:                       0.035
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     25.39
Date:                Thu, 15 Dec 2022   Prob (F-statistic):           5.97e-07
Time:                        15:25:15   Log-Likelihood:                 1698.7
No. Observations:                 706   AIC:                            -3393.
Df Residuals:                     704   BIC:                            -3384.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0032      0.001      3.898      0.0

In [94]:
# Estimate FF3 alpha
results = sm.OLS(ff5.CMA, sm.add_constant(ff5['Mkt-RF'])).fit()
print(results.summary())

results = sm.OLS(ff5.CMA, sm.add_constant(ff5[['Mkt-RF','SMB','HML']])).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                    CMA   R-squared:                       0.133
Model:                            OLS   Adj. R-squared:                  0.132
Method:                 Least Squares   F-statistic:                     108.4
Date:                Thu, 15 Dec 2022   Prob (F-statistic):           1.03e-23
Time:                        15:25:21   Log-Likelihood:                 1791.8
No. Observations:                 706   AIC:                            -3580.
Df Residuals:                     704   BIC:                            -3570.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0039      0.001      5.330      0.0

# Factor models and expected returns
Estimate factor risk premia

In [9]:
# Pull data
ff3 = pdr("F-F_Research_Data_Factors", "famafrench", start=1926)
ff5 = pdr("F-F_Research_Data_5_Factors_2x3", "famafrench", start=1964)
dgs = pdr("DGS10", "fred", start=1920)

# Estimate factor risk premia as time-series average returns
# annual 3 factors from 1926
ff3 = ff3[1]
fprem = ff3[["Mkt-RF", "SMB", "HML"]].mean()

# add annual 5 factors from 1964
fprem = pd.concat((fprem, ff5[1][["RMW", "CMA"]].mean()))
fprem = fprem.round(2)
factors = fprem.index.to_list()
fprem

Mkt-RF    8.87
SMB       3.02
HML       4.22
RMW       3.49
CMA       3.44
dtype: float64

Estimate factor loadings

In [12]:
# Pull stock return data
TICKER = 'IBM'
ret = pdr(TICKER, "yahoo", start=2016)
ret = ret["Adj Close"].resample("M").last()
ret = ret.pct_change()
ret.index = ret.index.to_period("M")
ret.name = "ret"
ret

Date
2016-01         NaN
2016-02    0.060729
2016-03    0.155842
2016-04   -0.036382
2016-05    0.063613
             ...   
2022-08   -0.005517
2022-09   -0.075049
2022-10    0.163959
2022-11    0.089560
2022-12   -0.000201
Freq: M, Name: ret, Length: 84, dtype: float64

In [None]:
# Pull factor returns and risk-free rate for last 60 months
ff = ff5[0].iloc[-60:] / 100
# rf = dgs.iloc[-1].item()
# rf = round(rf, 2)
ff

In [None]:
# Combine with stock returns
df = ff.join(ret, how="left")
# Calculate excess returns
df["ret"] = df.ret - df.RF
df = df[factors + ["ret"]].reset_index()
df.columns = ["date"] + factors + ["ret"]
df["date"] = df.date.astype(str)
df

In [15]:
# Estimate factor loadings
result = sm.OLS(df.ret, sm.add_constant(df[factors])).fit()
betas = result.params[1:]
betas = np.round(betas, 2)
betas

Mkt-RF    0.84
SMB       0.14
HML       0.05
RMW       0.41
CMA       0.63
dtype: float64

In [16]:
fprem

Mkt-RF    8.87
SMB       3.02
HML       4.22
RMW       3.49
CMA       3.44
dtype: float64

In [19]:
# Risk-free rate (in %)
rf = ff.RF[-1]*100
rf

0.22999999999999998

In [20]:
# Expected return
expret = rf + betas @ fprem
expret

11.9127

# Fama-MacBeth Characteristic Regressions
(Based on Lewellen's Critical Finance Review article)

In [95]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import urllib.parse
from sqlalchemy import create_engine
!pip install pymssql




[notice] A new release of pip available: 22.2.1 -> 22.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [96]:
chars = [
    "bm",
    "ep",
    "cashpr",
    "dy",
    "lev",
    "sp",
    "roic",
    "rd_sale",
    "rd_mve",
    "agr",
    "gma",
    "chcsho",
    "lgr",
    "acc",
    "pctacc",
    "cfp",
    "absacc",
    "age",
    "chinv",
    "hire",
    "sgr",
    "pchsale_pchinvt",
    "pchsale_pchrect",
    "pchgm_pchsale",
    "pchsale_pchxsga",
    "depr",
    "pchdepr",
    "invest",
    "egr",
    "grcapx",
    "tang",
    "sin",
    "currat",
    "pchcurrat",
    "quick",
    "pchquick",
    "salecash",
    "salerec",
    "saleinv",
    "pchsaleinv",
    "cashdebt",
    "realestate",
    "divi",
    "divo",
    "securedind",
    "secured",
    "convind",
    "grltnoa",
    "rd",
    "operprof",
    "ps",
    "chpmia",
    "chatoia",
    "chempia",
    "bm_ia",
    "pchcapx_ia",
    "tb",
    "cfp_ia",
    "mve_ia",
    "herf",
    "orgcap",
    "mve",
    "chtx",
    "roaq",
    "roeq",
    "rsup",
    "stdacc",
    "roavol",
    "stdcf",
    "cash",
    "cinvest",
    "nincr",
    "sue",
    "aeavol",
    "ear",
    "ms",
    "disp",
    "chfeps",
    "fgr5yr",
    "nanalyst",
    "sfe",
    "chnanalyst",
    "mom6m",
    "mom12m",
    "mom36m",
    "mom1m",
    "dolvol",
    "chmom",
    "turn",
    "ipo",
    "indmom",
    "maxret",
    "retvol",
    "baspread",
    "std_dolvol",
    "std_turn",
    "ill",
    "zerotrade",
    "beta",
    "betasq",
    "pricedelay",
    "idiovol",
]

intchars = [
    "age",
    "sin",
    "divi",
    "divo",
    "securedind",
    "convind",
    "rd",
    "ps",
    "nincr",
    "ms",
    "ipo",
]

chars = [x for x in chars if x not in intchars]
chars = np.sort(chars)
labels = ["Lo 20", "Qnt 2", "Qnt 3", "Qnt 4", "Hi 20"]

server = "eu-az-sql-serv1.database.windows.net:1433"
database = "dgn022k6348dcyh"
username = "uhgrque4d8p77hf"
password = "FfWrgFcK$Vnk@9BAgKH4nbEDF"
password = urllib.parse.quote_plus(password)

string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database
conn = create_engine(string).connect()


Pull the data from SQL server

In [97]:
char = 'mve, beta, mom12m, bm'        # example characteristics

df = pd.read_sql(
    " select ticker, date, ret, " + char + " from ghz where date>='2000-01-01' ", conn
)
df = df.dropna()

In [99]:
len(df.index)
df.date.min()
df.head()

Unnamed: 0,ticker,date,ret,mve,beta,mom12m,bm
0,CELG,2000-04-28,0.418079,14.573799,1.292682,9.34375,-0.014615
1,APW,2000-04-28,0.004386,13.922989,1.242077,-0.110433,0.350028
2,ACO,2000-04-28,0.065041,12.835614,0.943564,0.604415,0.651691
3,XRAY,2000-04-28,0.024229,14.204507,0.612569,0.109957,0.305512
4,DCN,2000-04-28,0.077605,15.342823,1.174181,-0.415371,0.433959


### Run cross-sectional regressions and take time-series average

In [104]:
chars = ['beta','mve', 'bm','mom12m']
coefs = df.groupby('date').apply(lambda d: sm.OLS(d.ret,sm.add_constant(d[chars])).fit().params) 

In [105]:
T = len(coefs)
stats = pd.DataFrame(dtype=float,index=coefs.columns,columns=['mean','tstat','pval'])
for coef in coefs.columns :
    tsreg = sm.OLS(coefs[coef],np.ones(T)).fit()

    stats.loc[coef,'mean'] = coefs[coef].mean()
    stats.loc[coef,'tstat'] = tsreg.tvalues['const']
    stats.loc[coef,'pval'] = tsreg.pvalues['const']
stats.round(4)

Unnamed: 0,mean,tstat,pval
const,0.0269,3.3762,0.0008
beta,-0.0016,-0.5857,0.5586
mve,-0.0011,-2.2309,0.0265
bm,-0.0009,-0.5383,0.5909
mom12m,-0.0004,-0.1621,0.8713


### Multiply current characteristics to get expected return estimate

In [119]:
TICKER='IBM'
ds = df[df.ticker==TICKER]
last = ds.sort_values(by='date').tail(1)
last

Unnamed: 0,ticker,date,ret,mve,beta,mom12m,bm
445475,IBM,2021-12-31,0.141418,18.469616,0.847302,0.050521,0.183301


In [152]:
lag_char = last[chars]
ts_avgs = stats['mean']
excess_return = ts_avgs['const'] + last[chars] @ ts_avgs[chars]
excess_return = excess_return*12*100
excess_return

445475    5.452153
dtype: float64

In [153]:
exp_ret = rf + excess_return
exp_ret.values

array([5.68215321])