# EXAMPLE DATA

In [1]:
# Date Range (input a year)
start_yr = 1980
stop_yr  = 2023

# Characteristic for second sort (1st is market equity)
char = "Book to market ratio"

# Some choices: 
# "Book to market ratio", "Investment rate", "Momentum", "Short term reversal",
# "Long term reversal", "Accruals", "Beta", "Net equity issuance", "Variance", "Residual variance"

# GET DATA

In [2]:
from pandas_datareader import DataReader as pdr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import statsmodels.api as sm
import plotly.express as px
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# make disctionary from lists of files and characteristics 
files = [
    '25_Portfolios_5x5',
    '25_Portfolios_ME_INV_5x5',
    '25_Portfolios_ME_Prior_12_2',
    '25_Portfolios_ME_Prior_1_0',
    '25_Portfolios_ME_Prior_60_13',
    '25_Portfolios_ME_AC_5x5',
    '25_Portfolios_ME_BETA_5x5',
    '25_Portfolios_ME_NI_5x5',
    '25_Portfolios_ME_VAR_5x5',
    '25_Portfolios_ME_RESVAR_5x5'
]

chars = [
    "Book to market ratio",
    "Investment rate",
    "Momentum",
    "Short term reversal",
    "Long term reversal",
    "Accruals",
    "Beta",
    "Net equity issuance",
    "Variance",
    "Residual variance",
]

charsDict = dict(zip(chars, files))
charsDict = dict(zip(chars, files))
chars.sort()

# get monthly factors and risk-free rate from French's data library
ff = pdr('F-F_Research_Data_5_Factors_2x3','famafrench', start=1900)[0]/100

# pull portfolio returns data from French's data library
RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
RETS = RETS.subtract(ff.RF, axis="index")
if char == "Net equity issuance":
    for x in RETS.columns:
        if x.split(" ")[1][0] == "Z" or x.split(" ")[1][0:2] == "Ne":
            RETS = RETS.drop(columns=x)

# Implement start and end dates
start = str(start_yr) + "-01"
stop = str(stop_yr) + "-12"
df = RETS.loc[start:stop].copy()

# see what the two chars are in the two-way sort
s = df.columns[1].split(" ")
s1 = s[0][:-1]             # market equity
s2 = s[1][:-1]             # other characteristic

def splitName(x):
    x1 = x.split(" ")[0]
    x1 = x1 if x1[0] == "M" else ("ME1" if x1[0] == "S" else "ME5")
    x2 = x.split(" ")[1]
    x2 = x2 if x2[0] == s2[0] else (s2 + "1" if x2[0] == "L" else s2 + "5")
    return x1, x2

splits = [splitName(x) for x in df.columns]


# CALCULATIONS

In [3]:
# 5x5 table calculations
df.columns = pd.MultiIndex.from_tuples(splits)

# multi-indexed index, for unstacking
factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
regr = pd.DataFrame(dtype=float, 
                    index=df.columns, 
                    columns=factors + ['alpha', 'tstat', 'empirical', 'theoretical'])
# df = df.join(ff).dropna()

for port in regr.index:
    subdf = pd.DataFrame(df[port])
    subdf.columns=['ret']
    subdf=subdf.join(ff).dropna()
    result = sm.OLS(subdf['ret'], sm.add_constant(subdf[factors])).fit()
    regr.loc[port, factors] = result.params[factors]
    regr.loc[port, 'alpha'] = 12 * result.params['const']
    regr.loc[port, 'tstat'] = result.tvalues['const']
    regr.loc[port, 'empirical'] = 12 * subdf['ret'].mean()
    regr.loc[port, 'theoretical'] = 12 * result.params[factors] @ subdf[factors].mean()

regr['port'] = splits

# 5 x 5 tables
alpha_tbl = regr.alpha.unstack()
tstat_tbl = regr.tstat.unstack()

# FIGURE (ALPHA HEAT MAP)

In [4]:
trace = go.Heatmap(
    x=alpha_tbl.columns.to_list(),
    y=alpha_tbl.index.to_list(),
    z=alpha_tbl,
    colorscale='Viridis',
    texttemplate="%{z:.2%}",
    hovertemplate="%{x} / %{y}<br>%{z:.3%}<extra></extra>"
)
alpha_tbl = go.Figure(trace)
alpha_tbl.show()

# FIGURE (t STAT HEAT MAP)

In [5]:
trace = go.Heatmap(
    x=tstat_tbl.columns.to_list(),
    y=tstat_tbl.index.to_list(),
    z=tstat_tbl,
    colorscale='Viridis',
    texttemplate="%{z:.2f}",
    hovertemplate="%{x} / %{y}<br>%{z:.3f}<extra></extra>"
)
tstat_tbl = go.Figure(trace)
tstat_tbl.show()