In [1]:
##### Parameters

# Characteristic for second sort (1st is market equity)
char = "Book to market ratio"

# Date Range (input a year)
start_yr = 1980
stop_yr  = 2023

# Some choices: 
# "Book to market ratio", "Investment rate", "Momentum", "Short term reversal",
# "Long term reversal", "Accruals", "Beta", "Net equity issuance", "Variance", "Residual variance"

In [2]:
from pandas_datareader import DataReader as pdr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import statsmodels.api as sm
import plotly.express as px

In [3]:
files = [
    '25_Portfolios_5x5',
    '25_Portfolios_ME_INV_5x5',
    '25_Portfolios_ME_Prior_12_2',
    '25_Portfolios_ME_Prior_1_0',
    '25_Portfolios_ME_Prior_60_13',
    '25_Portfolios_ME_AC_5x5',
    '25_Portfolios_ME_BETA_5x5',
    '25_Portfolios_ME_NI_5x5',
    '25_Portfolios_ME_VAR_5x5',
    '25_Portfolios_ME_RESVAR_5x5'
]

chars = [
    "Book to market ratio",
    "Investment rate",
    "Momentum",
    "Short term reversal",
    "Long term reversal",
    "Accruals",
    "Beta",
    "Net equity issuance",
    "Variance",
    "Residual variance",
]

charsDict = dict(zip(chars, files))
charsDict = dict(zip(chars, files))
chars.sort()

In [4]:
# get monthly market excess return and risk-free rate from French's data library
ff = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100

# pull portfolio returns data from French's data library
RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
RETS = RETS.subtract(ff.RF, axis="index")
if char == "Net equity issuance":
    for x in RETS.columns:
        if x.split(" ")[1][0] == "Z" or x.split(" ")[1][0:2] == "Ne":
            RETS = RETS.drop(columns=x)

# Implement start and end dates
start = str(start_yr) + "-01"
stop = str(stop_yr) + "-12"
df = RETS.loc[start:stop].copy()

# see what the two chars are in the two-way sort
s = df.columns[1].split(" ")
s1 = s[0][:-1]             # market equity
s2 = s[1][:-1]             # other characteristic

def splitName(x):
    x1 = x.split(" ")[0]
    x1 = x1 if x1[0] == "M" else ("ME1" if x1[0] == "S" else "ME5")
    x2 = x.split(" ")[1]
    x2 = x2 if x2[0] == s2[0] else (s2 + "1" if x2[0] == "L" else s2 + "5")
    return x1, x2

splits = [splitName(x) for x in df.columns]

mns = 12 * df.mean()
mns.index = [a + '-' + b for a, b in splits]
sds = np.sqrt(12) * df.std()
sds.index = [a + '-' + b for a, b in splits]

# 5x5 table calculations

df.columns = pd.MultiIndex.from_tuples(splits)

# multi-indexed index, for unstacking
regr = pd.DataFrame(dtype=float, index=df.columns, columns=['alpha', 'beta', 'tstat', 'pval', 'empirical', 'theoretical'])
df['Mkt-RF'] = ff['Mkt-RF']

for port in regr.index:
    result = sm.OLS(df[port], sm.add_constant(df['Mkt-RF'])).fit()
    regr.loc[port, 'alpha'] = result.params['const']
    regr.loc[port, 'beta'] = result.params['Mkt-RF']
    regr.loc[port, 'tstat'] = result.tvalues['const']
    regr.loc[port, 'pval'] = result.pvalues['const']
    regr.loc[port, 'empirical'] = 12 * df[port].mean()
    regr.loc[port, 'theoretical'] = 12 * result.params['Mkt-RF'] * df['Mkt-RF'].mean()

# 5 x 5 tables

alphas = 12*regr.alpha.unstack()
tstats = regr.tstat.unstack()

  ff = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100
  ff = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
  RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100


Alpha Heat Map

In [5]:
trace = go.Heatmap(
    x=alphas.columns.to_list(),
    y=alphas.index.to_list(),
    z=alphas ,
    colorscale='Viridis',
    texttemplate="%{z:.1%}"
)
alpha_tbl = go.Figure(trace)
# alpha_tbl = largefig(alpha_tbl)
alpha_tbl.show()

t-statististic Heat Map

In [6]:
trace = go.Heatmap(
    x=tstats.columns.to_list(),
    y=tstats.index.to_list(),
    z=tstats,
    colorscale='Viridis',
    texttemplate="%{z:.2f}"
)
tstat_tbl = go.Figure(trace)
tstat_tbl.show()

Plot of Betas and Average Excess Returns

In [7]:
regr['port'] = mns.index
fig1 = px.scatter(
    regr,
    x="beta",
    y="empirical",
    hover_name="port",
    trendline="ols",
)
fig1.update_traces(
    marker=dict(size=12, line=dict(width=2, color="DarkSlateGrey")),
    selector=dict(mode="markers"),
)

fig2 = px.scatter(regr, x="beta", y="theoretical", hover_name="port")
fig2.update_traces(
    marker=dict(size=12, color='green', line=dict(width=2, color="DarkSlateGrey")),
    selector=dict(mode="markers"),
)
fig = go.Figure(data=fig1.data + fig2.data)


fig.layout.xaxis["title"] = "Beta"
fig.layout.yaxis["title"] = "Mean Excess Return (Annualized)"
fig.update_yaxes(tickformat=".1%")
fig.update_xaxes(tickformat=".2")
fig.show()