<a href="https://colab.research.google.com/github/bbcx-investments/notebooks/blob/main/factor_investing/two_way_sorts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pandas_datareader import DataReader as pdr
import pandas as pd
import numpy as np
import statsmodels.api as sm

files = [
    '25_Portfolios_5x5',
    '25_Portfolios_ME_INV_5x5',
    '25_Portfolios_ME_Prior_12_2',
    '25_Portfolios_ME_Prior_1_0',
    '25_Portfolios_ME_Prior_60_13',
    '25_Portfolios_ME_AC_5x5',
    '25_Portfolios_ME_BETA_5x5',
    '25_Portfolios_ME_NI_5x5',
    '25_Portfolios_ME_VAR_5x5',
    '25_Portfolios_ME_RESVAR_5x5'
]

chars = [
    "Book to market ratio",
    "Investment rate",
    "Momentum",
    "Short term reversal",
    "Long term reversal",
    "Accruals",
    "Beta",
    "Net equity issuance",
    "Variance",
    "Residual variance",
]

charsDict = dict(zip(chars, files))
chars.sort()

RETS = None
CHAR = None

dates = [1980, 2010]
char = "Book to market ratio"           # example characteristic

CHAR = char
RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
ff = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0] / 100
RETS = RETS.subtract(ff.RF, axis="index")
if char == "Net equity issuance":
    for x in RETS.columns:
        if x.split(" ")[1][0] == "Z" or x.split(" ")[1][0:2] == "Ne":
            RETS = RETS.drop(columns=x)

start = str(dates[0]) + "-01"
stop = str(dates[1]) + "-12"
df = RETS.loc[start:stop].copy()

# see what the two chars are in the two-way sort
s = df.columns[1].split(" ")
s1 = s[0][:-1]             # market equity
s2 = s[1][:-1]             # other characteristic

def splitName(x):
    x1 = x.split(" ")[0]
    x1 = x1 if x1[0] == "M" else ("ME1" if x1[0] == "S" else "ME5")
    x2 = x.split(" ")[1]
    x2 = x2 if x2[0] == s2[0] else (s2 + "1" if x2[0] == "L" else s2 + "5")
    return x1, x2

splits = [splitName(x) for x in df.columns]

mns = 12 * df.mean()
mns.index = [a + '-' + b for a, b in splits]
sds = np.sqrt(12) * df.std()
sds.index = [a + '-' + b for a, b in splits]

# 5x5 table calculations

df.columns = pd.MultiIndex.from_tuples(splits)

In [None]:
means = 100 * 12 * df.mean()
means = means.unstack().round(2)
means

Unnamed: 0,BM1,BM2,BM3,BM4,BM5
ME1,0.03,9.53,9.64,12.68,12.69
ME2,4.38,9.93,10.43,11.15,11.79
ME3,6.33,10.21,9.04,10.42,13.27
ME4,8.45,9.02,8.56,9.48,10.51
ME5,6.88,7.48,7.6,5.72,8.83


In [None]:
sharpes = 100 * np.sqrt(12) * df.mean() / df.std()
sharpes = sharpes.unstack().round(2)
sharpes

Unnamed: 0,BM1,BM2,BM3,BM4,BM5
ME1,0.1,39.07,48.2,66.55,64.17
ME2,17.33,47.39,58.17,63.64,57.0
ME3,26.99,53.05,53.11,61.4,69.92
ME4,39.35,49.65,48.76,56.65,54.65
ME5,41.21,45.21,47.59,32.44,45.21


In [None]:
# multi-indexed index, for unstacking
regr = pd.DataFrame(dtype=float, index=df.columns, columns=['alpha', 'beta', 'tstat', 'empirical', 'theoretical'])
df['Mkt-RF'] = ff['Mkt-RF']

for port in regr.index:
    result = sm.OLS(df[port], sm.add_constant(df['Mkt-RF'])).fit()
    regr.loc[port, 'alpha'] = 100 * 12 * result.params['const']
    regr.loc[port, 'beta'] = result.params['Mkt-RF']
    regr.loc[port, 'tstat'] = result.tvalues['const']
    regr.loc[port, 'empirical'] = 12 * df[port].mean()
    regr.loc[port, 'theoretical'] = 12 * result.params['Mkt-RF'] * df['Mkt-RF'].mean()

# Alphas (annualized, in percent)
regr.alpha.unstack().round(2)

Unnamed: 0,BM1,BM2,BM3,BM4,BM5,Unnamed: 6
ME1,-3.01,0.54,1.26,2.92,2.72,
ME2,-2.11,1.14,2.32,2.84,2.1,
ME3,-1.25,1.84,1.8,2.65,3.38,
ME4,0.04,1.46,1.28,2.17,1.89,
ME5,0.13,0.8,1.15,-0.23,1.09,
Mkt-RF,,,,,,9.22


In [None]:
# t statistics
regr.tstat.unstack().round(2)

Unnamed: 0,BM1,BM2,BM3,BM4,BM5,Unnamed: 6
ME1,-3.01,0.54,1.26,2.92,2.72,
ME2,-2.11,1.14,2.32,2.84,2.1,
ME3,-1.25,1.84,1.8,2.65,3.38,
ME4,0.04,1.46,1.28,2.17,1.89,
ME5,0.13,0.8,1.15,-0.23,1.09,
Mkt-RF,,,,,,9.22
