<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Utility-functions" data-toc-modified-id="Utility-functions-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Utility functions</a></span></li><li><span><a href="#Regression-Logic" data-toc-modified-id="Regression-Logic-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Regression Logic</a></span></li><li><span><a href="#SMB_3F-on-Long-Leg" data-toc-modified-id="SMB_3F-on-Long-Leg-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>SMB_3F on Long Leg</a></span></li><li><span><a href="#SMB_3F-on-Short-Leg" data-toc-modified-id="SMB_3F-on-Short-Leg-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>SMB_3F on Short Leg</a></span></li></ul></div>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import statsmodels.api as sm
import datetime

import warnings
warnings.filterwarnings('ignore')

In [2]:
data_dir = "..\\data"

# Utility functions

In [3]:
def read_ff(filename):
    """
    Read ff portfolios
    """
    df = pd.read_excel(os.path.join(data_dir, filename), na_values=[-99.99])
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m').dt.to_period('M')
    df = df.set_index('Date')
    df = df.loc['1963-07':'2019-12']
    return df

def read_qmj(filename):
    """
    Read QMJ (AQR factor)
    """
    df = pd.read_excel(os.path.join(data_dir, filename))
    df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y').dt.to_period('M')
    df = df.set_index('Date')
    df = df.loc['1963-01':'2019-12']
    return df


def read_qfactor(filename):
    """
    Read Q-Factors
    """
    df = pd.read_excel(os.path.join(data_dir, filename))
    df['Date'] = pd.to_datetime(df['year'].astype(str) + df['month'].astype(str).apply(lambda x: x.zfill(2)), format='%Y%m').dt.to_period('M')
    df = df.drop(['year', 'month'], axis=1)
    df = df.set_index('Date')
    df = df.loc['1967-01':'2019-12']
    return df


def create_long_short_leg_for_hml(df):
    """
    Create long and short leg for hml factor
    """
    # SMALL LoBM	ME1 BM2	SMALL HiBM	BIG LoBM	ME2 BM2	BIG HiBM
    df['HML_L'] = (1/2)*(df['BIG HiBM'] + df['SMALL HiBM']) - (1/2)*(df['ME1 BM2'] + df['ME2 BM2'])
    df['HML_S'] = (1/2)*(df['ME1 BM2'] + df['ME2 BM2']) - (1/2)*(df['SMALL LoBM'] + df['BIG LoBM'])
    return df[['HML_L', 'HML_S']]
    

def create_long_short_leg_for_wml(df):
    """
    Create long and short leg for wml factor
    """
    df['WML_L'] = (1/2)*(df['BIG HiPRIOR'] + df['SMALL HiPRIOR']) - (1/2)*(df['ME2 PRIOR2'] + df['ME1 PRIOR2'])
    df['WML_S'] = (1/2)*(df['ME2 PRIOR2'] + df['ME1 PRIOR2']) - (1/2)*(df['SMALL LoPRIOR'] + df['BIG LoPRIOR'])
    return df[['WML_L', 'WML_S']]


def create_long_short_leg_for_rmw(df):
    """
    Create long and short leg for rmw factor
    """
    df['RMW_L'] = (1/2)*(df['BIG HiOP'] + df['SMALL HiOP']) - (1/2)*(df['ME2 OP2'] + df['ME1 OP2'])
    df['RMW_S'] = (1/2)*(df['ME2 OP2'] + df['ME1 OP2']) - (1/2)*(df['SMALL LoOP'] + df['BIG LoOP'])
    return df[['RMW_L', 'RMW_S']]


def create_long_short_leg_for_cma(df):
    """
    Create long and short leg for cma factor
    """
    df['CMA_L'] = (1/2)*(df['BIG LoINV'] + df['SMALL LoINV']) - (1/2)*(df['ME1 INV2'] + df['ME2 INV2'])
    df['CMA_S'] = (1/2)*(df['ME1 INV2'] + df['ME2 INV2']) - (1/2)*(df['SMALL HiINV'] + df['BIG HiINV'])
    return df[['CMA_L', 'CMA_S']]
    

def create_long_short_leg_for_qmj(df):
    """
    Create long and short leg for qmj factor
    """
    #     df['QMJ_L'] = (1/2)*(df['Small_Low	Small_Medium	Small_Large	Big_Low	Big_Medium	Big_Large'])
    df['QMJ_L'] = (1/2)*(df['Big_Large'] + df['Small_Large']) - (1/2)*(df['Big_Medium'] + df['Small_Medium'])
    df['QMJ_S'] = (1/2)*(df['Big_Medium'] + df['Small_Medium']) - (1/2)*(df['Big_Low'] + df['Small_Low'])
    return df[['QMJ_L', 'QMJ_S']]
    
    
def create_long_short_leg_for_ia(df):
    """
    Create long and short leg for ia factor
    """
    df['rank_IA'] = df['rank_IA'].astype(str)
    df = df.reset_index().groupby(['Date', 'rank_IA'])[['ret_vw']].mean().reset_index()
    df = pd.pivot_table(df, values='ret_vw', index=['Date'], columns=['rank_IA'])#, aggfunc=np.sum)
    
    # Create long and short legs
    df['IA_L'] = df['1'] - df['2']
    df['IA_S'] = df['2'] - df['3']
    
    return df[['IA_L', 'IA_S']]


def create_long_short_leg_for_roe(df):
    """
    Create long and short leg for roe factor
    """
    df['rank_ROE'] = df['rank_ROE'].astype(str)
    df = df.reset_index().groupby(['Date', 'rank_ROE'])[['ret_vw']].mean().reset_index()
    df = pd.pivot_table(df, values='ret_vw', index=['Date'], columns=['rank_ROE'])#, aggfunc=np.sum)
    
    # Create long and short legs
    df['ROE_L'] = df['3'] - df['2']
    df['ROE_S'] = df['2'] - df['1']
    
    return df[['ROE_L', 'ROE_S']]
    
    
def create_long_short_leg_for_eg(df):
    """
    Create long and short leg for eg factor
    """
    df['rank_EG'] = df['rank_EG'].astype(str)
    df = df.reset_index().groupby(['Date', 'rank_EG'])[['ret_vw']].mean().reset_index()
    df = pd.pivot_table(df, values='ret_vw', index=['Date'], columns=['rank_EG'])#, aggfunc=np.sum)
    
    # Create long and short legs
    df['EG_L'] = df['3'] - df['2']
    df['EG_S'] = df['2'] - df['1']
    
    return df[['EG_L', 'EG_S']]
    
    
def describe(df, n=2):
    print(df.shape)
    display(df.head(n))
    display(df.tail(n))

# Regression Logic

In [4]:
def run_regression(y, X=None):
    """
    Run regression based on X and y
    """
    if X is not None:
        X = sm.add_constant(X) 
    else:
        X = np.ones((len(y), 1))
        
    model = sm.OLS(endog=y, exog=X).fit()
    coeff = model.params.values
    tvalues = model.tvalues.values
    
    if X.shape[1] == 1:
        return '{0:.2f}'.format(coeff[0]), '({0:.2f})'.format(tvalues[0])
    
    return ['{0:.2f}'.format(x) for x in coeff], ['({0:.2f})'.format(x) for x in tvalues]


def get_exog(df, col):
    """
    Prepare X dataframe as per given column
    """
    if col == 'CMA_L':
        X = df[['Mkt-RF', 'HML_L', 'WML_L', 'RMW_L', 'CMA_L']]
        X['Mkt-RF(-1)'] = X['Mkt-RF'].shift(1)
        X = X.dropna()
        X = X.reindex(columns=['Mkt-RF', 'Mkt-RF(-1)', 'HML_L', 'WML_L', 'RMW_L', 'CMA_L'])
        return X
    elif col == 'QMJ_L':
        X = df[['Mkt-RF', 'HML_L', 'WML_L', 'QMJ_L']]
        X['Mkt-RF(-1)'] = X['Mkt-RF'].shift(1)
        X = X.dropna()
        X = X.reindex(columns=['Mkt-RF', 'Mkt-RF(-1)', 'HML_L', 'WML_L', 'QMJ_L'])
        return X
    elif col == 'EG_L':
        X = df[['Mkt-RF', 'IA_L', 'ROE_L', 'EG_L']]
        X['Mkt-RF(-1)'] = X['Mkt-RF'].shift(1)
        X = X.dropna()
        X = X.reindex(columns=['Mkt-RF', 'Mkt-RF(-1)', 'IA_L', 'ROE_L', 'EG_L'])
        return X
    elif col == 'CMA_S':
        X = df[['Mkt-RF', 'HML_S', 'WML_S', 'RMW_S', 'CMA_S']]
        X['Mkt-RF(-1)'] = X['Mkt-RF'].shift(1)
        X = X.dropna()
        X = X.reindex(columns=['Mkt-RF', 'Mkt-RF(-1)', 'HML_S', 'WML_S', 'RMW_S', 'CMA_S'])
        return X
    elif col == 'QMJ_S':
        X = df[['Mkt-RF', 'HML_S', 'WML_S', 'QMJ_S']]
        X['Mkt-RF(-1)'] = X['Mkt-RF'].shift(1)
        X = X.dropna()
        X = X.reindex(columns=['Mkt-RF', 'Mkt-RF(-1)', 'HML_S', 'WML_S', 'QMJ_S'])
        return X
    elif col == 'EG_S':
        X = df[['Mkt-RF', 'IA_S', 'ROE_S', 'EG_S']]
        X['Mkt-RF(-1)'] = X['Mkt-RF'].shift(1)
        X = X.dropna()
        X = X.reindex(columns=['Mkt-RF', 'Mkt-RF(-1)', 'IA_S', 'ROE_S', 'EG_S'])
        return X

In [5]:
def get_exhibit_for_long_leg(ff3, ff5, name=''):
    """
    Get exhibit results for a long leg
    """
    # Exhibit template
    exhibit = pd.DataFrame(columns=['alpha', 'Mkt', 'Mkt(-1)', 'HML_L', 'WML_L', 'RMW_L', 'CMA_L', 'QMJ_L', 'IA_L', 'ROE_L', 'EG_L'],
                        index=pd.MultiIndex.from_product([[1, 2, 3], ['coeff.', 't-stat.']], names=['', name]))
    exhibit = exhibit.fillna('-')
    
    # Fill up exhibit template
    endogs = [['alpha', 'Mkt', 'Mkt(-1)', 'HML_L', 'WML_L', 'RMW_L', 'CMA_L'],
              ['alpha', 'Mkt', 'Mkt(-1)', 'HML_L', 'WML_L', 'QMJ_L'],
              ['alpha', 'Mkt', 'Mkt(-1)', 'IA_L', 'ROE_L', 'EG_L']]

    for idx, cols in enumerate(endogs, 1):
        y = ff3['SMB']
        end_col = cols[-1]
        if end_col == 'CMA_L':
            X = get_exog(df=ff5, col=end_col)
            y = y.loc[X.index.values[0]:] # due to Mkt(-1) factor in X variable
            coeff, tvalues = run_regression(y=y, X=X)
            exhibit.loc[(idx, 'coeff.'), cols] = coeff
            exhibit.loc[(idx, 't-stat.'), cols] = tvalues
        elif end_col == 'QMJ_L':
            X = get_exog(df=ff5, col=end_col)
            y = y.loc[X.index.values[0]:] # due to Mkt(-1) factor in X variable
            coeff, tvalues = run_regression(y=y, X=X)
            exhibit.loc[(idx, 'coeff.'), cols] = coeff
            exhibit.loc[(idx, 't-stat.'), cols] = tvalues
        elif end_col == 'EG_L':
            X = get_exog(df=ff5, col=end_col)
            X = X.dropna() # Since data starts from Jan 1967, whereas it starts from Jul 1963 for other factors
            y = y.loc[X.index.values[0]:] # due to Mkt(-1) factor in X variable
            coeff, tvalues = run_regression(y=y, X=X)
            exhibit.loc[(idx, 'coeff.'), cols] = coeff
            exhibit.loc[(idx, 't-stat.'), cols] = tvalues
        
    return exhibit

In [6]:
def get_exhibit_for_short_leg(ff3, ff5, name=''):
    """
    Get exhibit results for a short leg
    """
    # Exhibit template
    exhibit = pd.DataFrame(columns=['alpha', 'Mkt', 'Mkt(-1)', 'HML_S', 'WML_S', 'RMW_S', 'CMA_S', 'QMJ_S', 'IA_S', 'ROE_S', 'EG_S'],
                        index=pd.MultiIndex.from_product([[1, 2, 3], ['coeff.', 't-stat.']], names=['', name]))
    exhibit = exhibit.fillna('-')
    
    # Fill up exhibit template
    endogs = [['alpha', 'Mkt', 'Mkt(-1)', 'HML_S', 'WML_S', 'RMW_S', 'CMA_S'],
              ['alpha', 'Mkt', 'Mkt(-1)', 'HML_S', 'WML_S', 'QMJ_S'],
              ['alpha', 'Mkt', 'Mkt(-1)', 'IA_S', 'ROE_S', 'EG_S']]

    for idx, cols in enumerate(endogs, 1):
        y = ff3['SMB']
        end_col = cols[-1]
        if end_col == 'CMA_S':
            X = get_exog(df=ff5, col=end_col)
            y = y.loc[X.index.values[0]:] # due to Mkt(-1) factor in X variable
            coeff, tvalues = run_regression(y=y, X=X)
            exhibit.loc[(idx, 'coeff.'), cols] = coeff
            exhibit.loc[(idx, 't-stat.'), cols] = tvalues
        elif end_col == 'QMJ_S':
            X = get_exog(df=ff5, col=end_col)
            y = y.loc[X.index.values[0]:] # due to Mkt(-1) factor in X variable
            coeff, tvalues = run_regression(y=y, X=X)
            exhibit.loc[(idx, 'coeff.'), cols] = coeff
            exhibit.loc[(idx, 't-stat.'), cols] = tvalues
        elif end_col == 'EG_S':
            X = get_exog(df=ff5, col=end_col)
            X = X.dropna() # Since data starts from Jan 1967, whereas it starts from Jul 1963 for other factors
            y = y.loc[X.index.values[0]:] # due to Mkt(-1) factor in X variable
            coeff, tvalues = run_regression(y=y, X=X)
            exhibit.loc[(idx, 'coeff.'), cols] = coeff
            exhibit.loc[(idx, 't-stat.'), cols] = tvalues
        
    return exhibit

In [7]:
# FF3 
ff3_us = read_ff("FF3_US.xlsx")

# FF5 
ff5_us = read_ff("FF5_US.xlsx")
#describe(ff5_us)

# Calculate long legs of each factor and join with ff5 dataframe.
# HML
ff_hml_us = read_ff("FF_SMB_HML_US.xlsx")
ff_hml_us = create_long_short_leg_for_hml(ff_hml_us)

# WML
ff_wml_us = read_ff("FF_SMB_WML_US.xlsx")
ff_wml_us = create_long_short_leg_for_wml(ff_wml_us)

# RMW
ff_rmw_us = read_ff("FF_SMB_RMW_US.xlsx")
ff_rmw_us = create_long_short_leg_for_rmw(ff_rmw_us)

# CMA
ff_cma_us = read_ff("FF_SMB_CMA_US.xlsx")
ff_cma_us = create_long_short_leg_for_cma(ff_cma_us)

# QMJ
aqr_qmj_us = read_qmj("QMJ_SMB_QMJ_US.xlsx")
aqr_qmj_us = create_long_short_leg_for_qmj(aqr_qmj_us)

# ---------------------------- Q-factors -------------------------- #
# IA
qfactor_ia_us = read_qfactor("QFactor_ME_IA_ROE_US.xlsx")
qfactor_ia_us = create_long_short_leg_for_ia(qfactor_ia_us)

# ROE
qfactor_roe_us = read_qfactor("QFactor_ME_IA_ROE_US.xlsx")
qfactor_roe_us = create_long_short_leg_for_roe(qfactor_roe_us)

# EG
qfactor_eg_us = read_qfactor("QFactor_ME_EG_US.xlsx")
qfactor_eg_us = create_long_short_leg_for_eg(qfactor_eg_us)

# Join with original df
ff5_us = (ff5_us.join(ff_hml_us)
                .join(ff_wml_us)
                .join(ff_rmw_us)
                .join(ff_cma_us)
                .join(aqr_qmj_us)
                .join(qfactor_ia_us)
                .join(qfactor_roe_us)
                .join(qfactor_eg_us))

describe(ff5_us)

(678, 22)


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,HML_L,HML_S,WML_L,WML_S,...,CMA_L,CMA_S,QMJ_L,QMJ_S,IA_L,IA_S,ROE_L,ROE_S,EG_L,EG_S
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-07,-0.39,-0.47,-0.83,0.66,-1.15,0.27,-1.30135,0.4754,0.075,0.92,...,-1.1226,-0.02725,0.476944,0.207492,,,,,,
1963-08,5.07,-0.79,1.67,0.4,-0.4,0.25,2.01925,-0.35125,1.5,-0.47,...,0.77935,-1.178,-0.348245,-0.643217,,,,,,


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,HML_L,HML_S,WML_L,WML_S,...,CMA_L,CMA_S,QMJ_L,QMJ_S,IA_L,IA_S,ROE_L,ROE_S,EG_L,EG_S
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11,3.87,0.5,-1.86,-1.5,-1.29,0.12,0.2445,-2.10515,-1.355,-1.26,...,0.37995,-1.67,-0.061807,-1.763146,0.973383,-1.9861,0.854633,-2.0213,0.22,-0.55315
2019-12,2.77,0.96,1.83,0.21,1.31,0.14,1.13545,0.6918,0.32,-2.45,...,1.32215,-0.00935,-0.627093,-1.760763,0.791433,1.044583,-0.635733,-0.684233,0.66615,-1.46425


# SMB_3F on Long Leg

In [8]:
exhibit6_long_leg = get_exhibit_for_long_leg(ff3=ff3_us, ff5=ff5_us, name='')
exhibit6_long_leg

Unnamed: 0,Unnamed: 1,alpha,Mkt,Mkt(-1),HML_L,WML_L,RMW_L,CMA_L,QMJ_L,IA_L,ROE_L,EG_L
,,,,,,,,,,,,
1.0,coeff.,-0.10,0.08,0.09,0.20,0.32,0.19,0.77,-,-,-,-
1.0,t-stat.,(-1.00),(3.23),(3.93),(2.75),(7.23),(1.94),(8.42),-,-,-,-
2.0,coeff.,-0.09,0.13,0.12,0.29,0.41,-,-,-0.22,-,-,-
2.0,t-stat.,(-0.82),(5.09),(5.04),(3.86),(9.01),-,-,(-2.09),-,-,-
3.0,coeff.,-0.07,0.16,0.11,-,-,-,-,-,0.51,0.00,0.18
3.0,t-stat.,(-0.63),(6.10),(4.22),-,-,-,-,-,(4.78),(0.02),(1.74)


# SMB_3F on Short Leg

In [9]:
exhibit6_short_leg = get_exhibit_for_short_leg(ff3=ff3_us, ff5=ff5_us, name='')
exhibit6_short_leg

Unnamed: 0,Unnamed: 1,alpha,Mkt,Mkt(-1),HML_S,WML_S,RMW_S,CMA_S,QMJ_S,IA_S,ROE_S,EG_S
,,,,,,,,,,,,
1.0,coeff.,0.31,0.01,0.09,0.03,-0.04,-0.63,-0.29,-,-,-,-
1.0,t-stat.,(3.09),(0.26),(3.99),(0.44),(-1.04),(-9.50),(-2.95),-,-,-,-
2.0,coeff.,0.40,-0.00,0.08,-0.19,0.00,-,-,-0.78,-,-,-
2.0,t-stat.,(3.89),(-0.01),(3.49),(-3.71),(0.01),-,-,(-10.30),-,-,-
3.0,coeff.,0.51,-0.01,0.08,-,-,-,-,-,-0.26,-0.35,-0.43
3.0,t-stat.,(4.67),(-0.26),(3.54),-,-,-,-,-,(-3.25),(-5.76),(-4.81)
