<a href="https://colab.research.google.com/github/bbcx-investments/notebooks/blob/main/factor_investing/ff_characteristics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import statsmodels.api as sm
from pandas_datareader import DataReader as pdr

files = [
    '25_Portfolios_5x5',
    '25_Portfolios_ME_INV_5x5',
    '25_Portfolios_ME_Prior_12_2',
    '25_Portfolios_ME_Prior_1_0',
    '25_Portfolios_ME_Prior_60_13',
    '25_Portfolios_ME_AC_5x5',
    '25_Portfolios_ME_BETA_5x5',
    '25_Portfolios_ME_NI_5x5',
    '25_Portfolios_ME_VAR_5x5',
    '25_Portfolios_ME_RESVAR_5x5'
]

chars = [
    "Book to market ratio",
    "Investment rate",
    "Momentum",
    "Short term reversal",
    "Long term reversal",
    "Accruals",
    "Beta",
    "Net equity issuance",
    "Variance",
    "Residual variance",
]

charsDict = dict(zip(chars, files))
chars.sort()


ff = pdr('F-F_Research_Data_5_Factors_2x3','famafrench', start=1900)[0]/100

dates = [1980, 2010]                    # example start and end dates
char = "Book to market ratio"           # example characteristic

RETS = pdr(charsDict[char], "famafrench", start=1963)[0] / 100
RETS = RETS.subtract(ff.RF, axis="index")
if char == "Net equity issuance":
    for x in RETS.columns:
        if x.split(" ")[1][0] == "Z" or x.split(" ")[1][0:2] == "Ne":
            RETS = RETS.drop(columns=x)

start = str(dates[0]) + "-01"
stop = str(dates[1]) + "-12"
df = RETS.loc[start:stop].copy()

# see what the two chars are in the two-way sort
s = df.columns[1].split(" ")
s1 = s[0][:-1]             # market equity
s2 = s[1][:-1]             # other characteristic

def splitName(x):
    x1 = x.split(" ")[0]
    x1 = x1 if x1[0] == "M" else ("ME1" if x1[0] == "S" else "ME5")
    x2 = x.split(" ")[1]
    x2 = x2 if x2[0] == s2[0] else (s2 + "1" if x2[0] == "L" else s2 + "5")
    return x1, x2

splits = [splitName(x) for x in df.columns]

df.columns = pd.MultiIndex.from_tuples(splits)

# multi-indexed index, for unstacking
factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
regr = pd.DataFrame(
    dtype=float,
    index=df.columns,
    columns=factors + ['alpha', 'tstat', 'empirical', 'theoretical']
)

df = df.join(ff).dropna()

for port in regr.index:
    result = sm.OLS(df[port], sm.add_constant(df[factors])).fit()
    regr.loc[port, factors] = result.params[factors]
    regr.loc[port, 'alpha'] = 100 * 12 * result.params['const']
    regr.loc[port, 'tstat'] = result.tvalues['const']
    regr.loc[port, 'empiricalf'] = 12 * df[port].mean()
    regr.loc[port, 'theoretical'] = 12 * result.params[factors] @ df[factors].mean()

regr['port'] = splits

# 5 x 5 tables

alpha_tbl = regr.alpha.unstack().round(2)
tstat_tbl = regr.tstat.unstack().round(2)

  df = df.join(ff).dropna()


In [None]:
alpha_tbl

Unnamed: 0,BM1,BM2,BM3,BM4,BM5
ME1,-4.47,3.99,0.8,3.58,1.8
ME2,-1.63,0.18,-0.51,0.4,-0.42
ME3,1.57,0.37,-1.91,-1.04,0.41
ME4,3.45,-2.14,-3.65,-0.69,-1.55
ME5,1.2,-1.64,-1.37,-4.85,-0.06


In [None]:
tstat_tbl

Unnamed: 0,BM1,BM2,BM3,BM4,BM5
ME1,-2.97,3.42,0.92,3.91,1.83
ME2,-1.78,0.2,-0.58,0.48,-0.49
ME3,1.66,0.35,-1.86,-1.01,0.32
ME4,3.39,-2.01,-3.21,-0.59,-1.1
ME5,1.69,-1.63,-1.16,-4.26,-0.04
