# Market Model Regression

In [9]:
import pandas as pd
import numpy as np
from pandas_datareader import DataReader as pdr
import statsmodels.api as sm
import plotly.graph_objects as go

# Read industry and clean-up missing data (coded -99.99)
ff48 = pdr("48_Industry_Portfolios", "famafrench", start=1900)[0]

# Clean-up missings
for c in ff48.columns:
    ff48[c] = np.where(ff48[c]==-99.99, np.nan, ff48[c])
ff48 = ff48/100

# Pull and merge market returns
ff3 = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100
df = ff48.join(ff3[['Mkt-RF','RF']])
df = df.loc['1970-01':].copy()  # There is missing data prior to 1970

In [11]:
df.columns

Index(['Agric', 'Food ', 'Soda ', 'Beer ', 'Smoke', 'Toys ', 'Fun  ', 'Books',
       'Hshld', 'Clths', 'Hlth ', 'MedEq', 'Drugs', 'Chems', 'Rubbr', 'Txtls',
       'BldMt', 'Cnstr', 'Steel', 'FabPr', 'Mach ', 'ElcEq', 'Autos', 'Aero ',
       'Ships', 'Guns ', 'Gold ', 'Mines', 'Coal ', 'Oil  ', 'Util ', 'Telcm',
       'PerSv', 'BusSv', 'Comps', 'Chips', 'LabEq', 'Paper', 'Boxes', 'Trans',
       'Whlsl', 'Rtail', 'Meals', 'Banks', 'Insur', 'RlEst', 'Fin  ', 'Other',
       'Mkt-RF', 'RF'],
      dtype='object')

In [10]:
## Market model
varname = 'Util '
y = df[varname]-df['RF']
X = sm.add_constant(df['Mkt-RF'])
mm = sm.OLS(y, X, missing='drop').fit()
print(mm.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.332
Model:                            OLS   Adj. R-squared:                  0.331
Method:                 Least Squares   F-statistic:                     313.2
Date:                Tue, 15 Nov 2022   Prob (F-statistic):           3.26e-57
Time:                        10:22:41   Log-Likelihood:                 1246.8
No. Observations:                 633   AIC:                            -2490.
Df Residuals:                     631   BIC:                            -2481.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0025      0.001      1.849      0.0

In [17]:
## Market model
varname = 'Autos'
y = df[varname]-df['RF']
X = sm.add_constant(df['Mkt-RF'])
mm = sm.OLS(y, X, missing='drop').fit()
print(mm.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.534
Method:                 Least Squares   F-statistic:                     725.4
Date:                Tue, 15 Nov 2022   Prob (F-statistic):          5.93e-107
Time:                        10:25:03   Log-Likelihood:                 957.86
No. Observations:                 633   AIC:                            -1912.
Df Residuals:                     631   BIC:                            -1903.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       8.597e-05      0.002      0.040      0.9

In [29]:
def params(varname):
    y = df[varname]-df['RF']
    X = sm.add_constant(df['Mkt-RF'])
    mm = sm.OLS(y, X, missing='drop').fit()
    return mm.params.values
df_beta = pd.DataFrame(index=ff48.columns, columns = ['alpha','beta'],dtype=float)
for c in ff48.columns:
    df_beta.loc[c,:] = params(c)

In [30]:
df_beta

Unnamed: 0,alpha,beta
Agric,0.001613,0.844771
Food,0.003292,0.65051
Soda,0.002821,0.799203
Beer,0.003371,0.722205
Smoke,0.006245,0.635587
Toys,-0.003034,1.178129
Fun,0.001189,1.338614
Books,-0.00128,1.081942
Hshld,0.000399,0.773439
Clths,0.000556,1.114118


## Let's split the sample into two half and estimate alphas and betas on each

In [63]:
# Adjust function to take a dataframe as an argument
def params(frame, varname):
    y = frame[varname]-frame['RF']
    X = sm.add_constant(frame['Mkt-RF'])
    mm = sm.OLS(y, X, missing='drop').fit()
    return mm.params.values

# Find the halfway mark
T = len(df.index)
halfway = df.index[int(T/2)]
halfway

# Split sample
df_first_half = df.loc[:halfway]
df_second_half= df.loc[halfway+1:]

# Output dataframe will have two sets of parameters
df_beta = pd.DataFrame(index=ff48.columns, columns = ['alpha_0','beta_0','alpha_1','beta_1'],dtype=float)
for c in ff48.columns:
    # First half regression
    df_beta.loc[c,['alpha_0','beta_0']] = params(df_first_half,c)

    # Second half regression
    df_beta.loc[c,['alpha_1','beta_1']] = params(df_second_half,c)    

### Compare the persistence of alpha to the persistence of beta

In [108]:
# Alpha regression
y = df_beta['alpha_1']
X = sm.add_constant(df_beta['alpha_0'])
X = df_beta['alpha_0']
results = sm.OLS(y, X, missing='drop').fit()
print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:                alpha_1   R-squared (uncentered):                   0.151
Model:                            OLS   Adj. R-squared (uncentered):              0.133
Method:                 Least Squares   F-statistic:                              8.389
Date:                Tue, 15 Nov 2022   Prob (F-statistic):                     0.00571
Time:                        12:45:08   Log-Likelihood:                          217.50
No. Observations:                  48   AIC:                                     -433.0
Df Residuals:                      47   BIC:                                     -431.1
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [109]:
# Scatter plot

trace  = go.Scatter(x=df_beta['alpha_0'], y=df_beta['alpha_1'], mode="markers")
minval = np.min([df_beta['alpha_1'].min(), df_beta['alpha_0'].min()])
maxval = np.max([df_beta['alpha_1'].max(), df_beta['alpha_0'].max()])
trace_45 = go.Scatter(x= np.linspace(minval,maxval,20), y = np.linspace(minval,maxval,20), mode='lines',name='45-degree line')
# ols_fit = results.params[0] + results.params[1]*np.linspace(minval,maxval,20)
ols_fit = results.params['alpha_0']*np.linspace(minval,maxval,20)
trace_ols= go.Scatter(x= np.linspace(minval,maxval,20), y = ols_fit, mode='lines',name='Predicted')

fig = go.Figure()
fig.add_trace(trace)
fig.add_trace(trace_45)
fig.add_trace(trace_ols)
fig.update_xaxes(title='1st Half Alpha',tickformat=".2%", range=[1.1*minval, 1.1*maxval])
fig.update_yaxes(title='2nd Half Alpha',tickformat=".2%", range=[1.1*minval, 1.1*maxval])
fig.update_layout(title='Alpha Persistence')
fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01))
fig.show()

In [103]:
# Beta regression
y = df_beta['beta_1']
X = sm.add_constant(df_beta['beta_0'])
results = sm.OLS(y, X, missing='drop').fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 beta_1   R-squared:                       0.249
Model:                            OLS   Adj. R-squared:                  0.233
Method:                 Least Squares   F-statistic:                     15.29
Date:                Tue, 15 Nov 2022   Prob (F-statistic):           0.000302
Time:                        12:44:10   Log-Likelihood:                -3.0123
No. Observations:                  48   AIC:                             10.02
Df Residuals:                      46   BIC:                             13.77
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0906      0.231      0.393      0.6

In [104]:
# Scatter plot

trace  = go.Scatter(x=df_beta['beta_0'], y=df_beta['beta_1'], mode="markers", name = 'Alphas')
minval = np.min([df_beta['beta_1'].min(), df_beta['beta_0'].min()])
maxval = np.max([df_beta['beta_1'].max(), df_beta['beta_0'].max()])

trace_45 = go.Scatter(x= np.linspace(minval,maxval,20), y = np.linspace(minval,maxval,20), mode='lines',name='45-degree line')
ols_fit = results.params[0] + results.params[1]*np.linspace(minval,maxval,20)
trace_ols= go.Scatter(x= np.linspace(minval,maxval,20), y = ols_fit, mode='lines',name='Predicted')

fig = go.Figure()
fig.add_trace(trace)
fig.add_trace(trace_45)
fig.add_trace(trace_ols)
fig.update_xaxes(title='1st Half Beta',tickformat=".2f", range=[0.9*minval, 1.1*maxval])
fig.update_yaxes(title='2nd Half Beta',tickformat=".2f", range=[0.9*minval, 1.1*maxval])
fig.update_layout(title='Beta Persistence')
fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01))
fig.show()

In [94]:
#Shrinking betas vs. using prior estimates
df_beta['beta_adj'] = 0.67*df_beta['beta_0'] +0.33

In [98]:
# Use first-half estimate
((df_beta.beta_1 - df_beta.beta_0)**2).mean()

0.07968999708606787

In [100]:
# Use adjusted version of first-half estimate
((df_beta.beta_1 - df_beta.beta_adj)**2).mean()

0.07355069002791492