In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

In [2]:
data_dir = "./data"

In [3]:
# Missing data are indicated by -99.99 or -999.

ff_5_factors = pd.read_csv(f"{data_dir}/F-F_Research_Data_5_Factors_2x3.csv", skiprows=4, names=["Year_Month", "Mkt", "SMB", "HML", "RMW", "CMA", "RF"])
ff_5_factors.set_index("Year_Month", inplace = True)
ff_5_factors = ff_5_factors.loc['196307':'201606']

'''The first concerns the profitability factor RMWO in Eq. (4). It is constructed by sorting stocks
on the accruals-based operating profitability (OP) measure
suggested by Novy-Marx (2013), and it is the profitability factor in the five-factor model of FF (2015, 2016, 2017)
'''
ff_mom = pd.read_csv(f"{data_dir}/F-F_Momentum_Factor.csv", skiprows=14, names=["Year_Month", "Mom"])
ff_mom = ff_mom.rename(columns={'Mom': 'UMD'}) 

ff_mom.set_index("Year_Month", inplace = True)

ff_6_factors = ff_5_factors.merge(ff_mom, on='Year_Month', how='left')
ff_6_factors = ff_6_factors.astype(float)

In [4]:
ff_6_factors

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,RF,UMD
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.27,0.90
196308,5.07,-0.80,1.80,0.36,-0.35,0.25,1.01
196309,-1.57,-0.52,0.13,-0.71,0.29,0.27,0.19
196310,2.53,-1.39,-0.10,2.80,-2.01,0.29,3.12
196311,-0.85,-0.88,1.75,-0.51,2.24,0.27,-0.74
...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,0.02,-4.38
201603,6.96,1.07,1.19,0.77,-0.08,0.02,-5.01
201604,0.91,1.23,3.28,-2.97,1.90,0.01,-6.02
201605,1.78,-0.61,-1.66,-1.09,-2.48,0.01,1.42


In [5]:
RF = ff_6_factors['RF']

In [6]:
ff_6_factors.loc[ff_6_factors.values == -999]

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,RF,UMD
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


### Table 1

#### Panel A

In [7]:
formed_on_size = pd.read_csv(f"{data_dir}/Portfolios_Formed_on_ME.csv", skiprows=13, usecols=range(5), names=["Year_Month", "<= 0", "Lo 30", "Med 40", "Hi 30"])

formed_on_size = formed_on_size[["Year_Month", "Lo 30", "Med 40", "Hi 30"]]
formed_on_size = formed_on_size.iloc[:1176]
formed_on_size.set_index("Year_Month", inplace = True)
formed_on_size = formed_on_size.loc['196307':'201606']
formed_on_size

Unnamed: 0_level_0,Lo 30,Med 40,Hi 30
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
196307,-0.89,-1.38,0.06
196308,3.64,5.42,5.39
196309,-1.30,-2.11,-1.18
196310,1.34,1.59,3.06
196311,-1.53,-1.01,-0.48
...,...,...,...
201602,-0.04,1.34,-0.17
201603,7.14,8.12,6.79
201604,3.38,1.60,0.70
201605,1.24,1.88,1.76


In [8]:
ff_6_factors_size = ff_6_factors.merge(formed_on_size, on='Year_Month', how='left')
ff_6_factors_size

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,RF,UMD,Lo 30,Med 40,Hi 30
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.27,0.90,-0.89,-1.38,0.06
196308,5.07,-0.80,1.80,0.36,-0.35,0.25,1.01,3.64,5.42,5.39
196309,-1.57,-0.52,0.13,-0.71,0.29,0.27,0.19,-1.30,-2.11,-1.18
196310,2.53,-1.39,-0.10,2.80,-2.01,0.29,3.12,1.34,1.59,3.06
196311,-0.85,-0.88,1.75,-0.51,2.24,0.27,-0.74,-1.53,-1.01,-0.48
...,...,...,...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,0.02,-4.38,-0.04,1.34,-0.17
201603,6.96,1.07,1.19,0.77,-0.08,0.02,-5.01,7.14,8.12,6.79
201604,0.91,1.23,3.28,-2.97,1.90,0.01,-6.02,3.38,1.60,0.70
201605,1.78,-0.61,-1.66,-1.09,-2.48,0.01,1.42,1.24,1.88,1.76


In [9]:
ff_6_factors_size = ff_6_factors_size.astype(float)

In [10]:
ff_6_factors_size.loc[ff_6_factors_size.values == -999]


Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,RF,UMD,Lo 30,Med 40,Hi 30
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [11]:
ff_6_factors_size['S-F'] = ff_6_factors_size['Lo 30'] - RF
ff_6_factors_size['B-F'] = ff_6_factors_size['Hi 30'] - RF
ff_6_factors_size.describe()

Unnamed: 0,Mkt,SMB,HML,RMW,CMA,RF,UMD,Lo 30,Med 40,Hi 30,S-F,B-F
count,636.0,636.0,636.0,636.0,636.0,636.0,636.0,636.0,636.0,636.0,636.0,636.0
mean,0.500487,0.25022,0.344937,0.264135,0.310079,0.395865,0.690991,1.126887,1.091918,0.874119,0.731022,0.478255
std,4.437925,3.035771,2.794975,2.22285,1.989572,0.262893,4.235675,6.137846,5.313594,4.273633,6.154996,4.284977
min,-23.24,-15.32,-11.29,-18.65,-6.8,0.0,-34.3,-29.43,-27.18,-20.8,-30.03,-21.4
25%,-1.9825,-1.465,-1.2225,-0.78,-0.95,0.25,-0.74,-2.34,-2.0725,-1.6025,-2.905,-1.885
50%,0.805,0.1,0.315,0.235,0.14,0.4,0.78,1.45,1.505,1.16,1.06,0.8
75%,3.4075,2.0725,1.73,1.3025,1.52,0.53,2.9225,4.69,4.5125,3.6125,4.4,3.2675
max,16.1,18.28,12.47,13.07,9.07,1.35,18.2,26.91,22.76,17.75,26.33,17.24


In [12]:
t1_panel_a_factors = ['Mkt', 'SMB', 'S-F', 'B-F']
average_return = ff_6_factors_size[t1_panel_a_factors].describe().loc['mean']

# Calculate mean and standard deviation
std_returns = ff_6_factors_size[t1_panel_a_factors].std()
n = len(ff_6_factors_size)

# Calculate t-statistics
t_stats = average_return / (std_returns / np.sqrt(n))
t_stats

Mkt    2.844080
SMB    2.078652
S-F    2.995237
B-F    2.814747
dtype: float64

In [13]:
# Create the summary table
t1_panel_a = pd.DataFrame({
    'Factors': t1_panel_a_factors,
    'Average Return': average_return.values,
    't-statistic': t_stats.values
})

t1_panel_a = t1_panel_a.set_index('Factors').T
t1_panel_a

Factors,Mkt,SMB,S-F,B-F
Average Return,0.500487,0.25022,0.731022,0.478255
t-statistic,2.84408,2.078652,2.995237,2.814747


In [None]:
t1_panel_a.to_excel('./result/table1_a.xlsx', index=False)

In [15]:
ff_6_factors_size.columns

Index(['Mkt', 'SMB', 'HML', 'RMW', 'CMA', 'RF', 'UMD', 'Lo 30', 'Med 40',
       'Hi 30', 'S-F', 'B-F'],
      dtype='object')

In [16]:
master = ff_6_factors_size.drop(['RF', 'Lo 30', 'Med 40', 'Hi 30'], axis=1)
master

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,UMD,S-F,B-F
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.90,-1.16,-0.21
196308,5.07,-0.80,1.80,0.36,-0.35,1.01,3.39,5.14
196309,-1.57,-0.52,0.13,-0.71,0.29,0.19,-1.57,-1.45
196310,2.53,-1.39,-0.10,2.80,-2.01,3.12,1.05,2.77
196311,-0.85,-0.88,1.75,-0.51,2.24,-0.74,-1.80,-0.75
...,...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,-4.38,-0.06,-0.19
201603,6.96,1.07,1.19,0.77,-0.08,-5.01,7.12,6.77
201604,0.91,1.23,3.28,-2.97,1.90,-6.02,3.37,0.69
201605,1.78,-0.61,-1.66,-1.09,-2.48,1.42,1.23,1.75


#### Panel B

In [17]:
# 6 portfolios
port_6_value = pd.read_csv(f"{data_dir}/6_Portfolios_2x3.csv", skiprows=16, names=["Year_Month", "SMALL LoBM", "ME1 BM2", "SMALL HiBM", "BIG LoBM", "ME2 BM2", "BIG HiBM"])

port_6_value = port_6_value[["Year_Month", "SMALL LoBM", "SMALL HiBM", "BIG LoBM", "BIG HiBM"]]
port_6_value = port_6_value.rename(columns={'SMALL LoBM': 'L_s', 'SMALL HiBM': 'H_s', 'BIG LoBM': 'L_b', 'BIG HiBM': 'H_b'}) 
port_6_value = port_6_value[:1176]
port_6_value.set_index("Year_Month", inplace = True)
port_6_value = port_6_value.loc['196307':'201606']

port_6_value = port_6_value.astype(float)

port_6_value

Unnamed: 0_level_0,L_s,H_s,L_b,H_b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
196307,-0.9960,-1.1202,-0.0411,-1.8554
196308,4.5361,5.7786,5.3890,7.7374
196309,-3.0338,-1.9949,-1.0644,-1.8500
196310,1.2001,2.7144,3.8445,2.1333
196311,-2.9288,-0.3615,-0.2458,0.6863
...,...,...,...,...
201602,-0.7099,0.3780,0.0215,-2.2142
201603,7.7058,8.4662,6.5760,8.2047
201604,2.6434,3.8478,-0.6295,4.7317
201605,2.3290,0.5247,2.1877,0.6654


In [18]:
port_6_value['HML_s'] = port_6_value['H_s'] - port_6_value['L_s']
port_6_value['HML_b'] = port_6_value['H_b'] - port_6_value['L_b']
port_6_value['H'] = (port_6_value['H_s'] + port_6_value['H_b']) / 2
port_6_value['L'] = (port_6_value['L_s'] + port_6_value['L_b']) / 2
port_6_value['HML'] = port_6_value['H'] - port_6_value['L']

port_6_value['H-F'] = port_6_value['H'].values - RF.values
port_6_value['H_s-F'] = port_6_value['H_s'].values - RF.values
port_6_value['H_b-F'] = port_6_value['H_b'].values - RF.values

port_6_value['L-F'] = port_6_value['L'].values - RF.values
port_6_value['L_s-F'] = port_6_value['L_s'].values - RF.values
port_6_value['L_b-F'] = port_6_value['L_b'].values - RF.values

port_6_value['HML_s-b'] = port_6_value['HML_s'] - port_6_value['HML_b']
port_6_value['H_s-b'] = port_6_value['H_s'] - port_6_value['H_b']
port_6_value['L_s-b'] = port_6_value['L_s'] - port_6_value['L_b']

port_6_value

Unnamed: 0_level_0,L_s,H_s,L_b,H_b,HML_s,HML_b,H,L,HML,H-F,H_s-F,H_b-F,L-F,L_s-F,L_b-F,HML_s-b,H_s-b,L_s-b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
196307,-0.9960,-1.1202,-0.0411,-1.8554,-0.1242,-1.8143,-1.48780,-0.51855,-0.96925,-1.75780,-1.3902,-2.1254,-0.78855,-1.2660,-0.3111,1.6901,0.7352,-0.9549
196308,4.5361,5.7786,5.3890,7.7374,1.2425,2.3484,6.75800,4.96255,1.79545,6.50800,5.5286,7.4874,4.71255,4.2861,5.1390,-1.1059,-1.9588,-0.8529
196309,-3.0338,-1.9949,-1.0644,-1.8500,1.0389,-0.7856,-1.92245,-2.04910,0.12665,-2.19245,-2.2649,-2.1200,-2.31910,-3.3038,-1.3344,1.8245,-0.1449,-1.9694
196310,1.2001,2.7144,3.8445,2.1333,1.5143,-1.7112,2.42385,2.52230,-0.09845,2.13385,2.4244,1.8433,2.23230,0.9101,3.5545,3.2255,0.5811,-2.6444
196311,-2.9288,-0.3615,-0.2458,0.6863,2.5673,0.9321,0.16240,-1.58730,1.74970,-0.10760,-0.6315,0.4163,-1.85730,-3.1988,-0.5158,1.6352,-1.0478,-2.6830
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,-0.7099,0.3780,0.0215,-2.2142,1.0879,-2.2357,-0.91810,-0.34420,-0.57390,-0.93810,0.3580,-2.2342,-0.36420,-0.7299,0.0015,3.3236,2.5922,-0.7314
201603,7.7058,8.4662,6.5760,8.2047,0.7604,1.6287,8.33545,7.14090,1.19455,8.31545,8.4462,8.1847,7.12090,7.6858,6.5560,-0.8683,0.2615,1.1298
201604,2.6434,3.8478,-0.6295,4.7317,1.2044,5.3612,4.28975,1.00695,3.28280,4.27975,3.8378,4.7217,0.99695,2.6334,-0.6395,-4.1568,-0.8839,3.2729
201605,2.3290,0.5247,2.1877,0.6654,-1.8043,-1.5223,0.59505,2.25835,-1.66330,0.58505,0.5147,0.6554,2.24835,2.3190,2.1777,-0.2820,-0.1407,0.1413


In [19]:
factor_1 = ['Mkt', 'SMB', 'HML', 'RMW', 'CMA', 'UMD']
factor_2 = ['Mkt', 'SMB', 'HML_s', 'RMW_s', 'CMA_s', 'UMD_s']
factor_3 = ['Mkt', 'S-F', 'H-F', 'R-F', 'C-F', 'U-F']
factor_4 = ['Mkt', 'S-F', 'H_s-F', 'R_s-F', 'C_s-F', 'U_s-F']
factor_5 = ['Mkt', 'S-F', 'L-F', 'W-F', 'A-F', 'D-F']
factor_6 = ['Mkt', 'S-F', 'L_s-F', 'W_s-F', 'A_s-F', 'D_s-F']

factors = [factor_1, factor_2, factor_3, factor_4, factor_5, factor_6]

In [20]:
value_i = 2
value_factors = [factor[value_i] for factor in factors][1:]
master = pd.concat([master, port_6_value[value_factors]], axis=1)
master

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,UMD,S-F,B-F,HML_s,H-F,H_s-F,L-F,L_s-F
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.90,-1.16,-0.21,-0.1242,-1.75780,-1.3902,-0.78855,-1.2660
196308,5.07,-0.80,1.80,0.36,-0.35,1.01,3.39,5.14,1.2425,6.50800,5.5286,4.71255,4.2861
196309,-1.57,-0.52,0.13,-0.71,0.29,0.19,-1.57,-1.45,1.0389,-2.19245,-2.2649,-2.31910,-3.3038
196310,2.53,-1.39,-0.10,2.80,-2.01,3.12,1.05,2.77,1.5143,2.13385,2.4244,2.23230,0.9101
196311,-0.85,-0.88,1.75,-0.51,2.24,-0.74,-1.80,-0.75,2.5673,-0.10760,-0.6315,-1.85730,-3.1988
...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,-4.38,-0.06,-0.19,1.0879,-0.93810,0.3580,-0.36420,-0.7299
201603,6.96,1.07,1.19,0.77,-0.08,-5.01,7.12,6.77,0.7604,8.31545,8.4462,7.12090,7.6858
201604,0.91,1.23,3.28,-2.97,1.90,-6.02,3.37,0.69,1.2044,4.27975,3.8378,0.99695,2.6334
201605,1.78,-0.61,-1.66,-1.09,-2.48,1.42,1.23,1.75,-1.8043,0.58505,0.5147,2.24835,2.3190


In [21]:
def panel_b_avg_return_t_stat(factor_df, factors):

    average_return = factor_df[factors].mean()

    # Calculate mean and standard deviation
    std_returns = factor_df[factors].std()
    n = len(factor_df)

    # Calculate t-statistics
    t_stats = average_return / (std_returns / np.sqrt(n))
    t_stats

    factors_table = pd.DataFrame({
        'Factors': factors,
        'Average Return': average_return.values,
        't-statistic': t_stats.values
    })

    return factors_table

In [22]:
t1_panel_b = pd.DataFrame()

In [23]:
t1_panel_b_value = ['HML', 'HML_s', 'HML_b', 'H-F', 'H_s-F', 'H_b-F', 'L-F', 'L_s-F', 'L_b-F', 'HML_s-b', 'H_s-b', 'L_s-b']

t1_panel_b_value_table = panel_b_avg_return_t_stat(port_6_value, t1_panel_b_value)

t1_panel_b = pd.concat([t1_panel_b, t1_panel_b_value_table], axis=1)
t1_panel_b_value_table

Unnamed: 0,Factors,Average Return,t-statistic
0,HML,0.34499,3.112855
1,HML_s,0.490823,3.898458
2,HML_b,0.199158,1.618725
3,H-F,0.829006,4.165839
4,H_s-F,0.980765,4.428705
5,H_b-F,0.677248,3.517978
6,L-F,0.484016,2.232661
7,L_s-F,0.489942,1.809908
8,L_b-F,0.47809,2.620844
9,HML_s-b,0.291665,2.573347


In [24]:
t1_panel_b

Unnamed: 0,Factors,Average Return,t-statistic
0,HML,0.34499,3.112855
1,HML_s,0.490823,3.898458
2,HML_b,0.199158,1.618725
3,H-F,0.829006,4.165839
4,H_s-F,0.980765,4.428705
5,H_b-F,0.677248,3.517978
6,L-F,0.484016,2.232661
7,L_s-F,0.489942,1.809908
8,L_b-F,0.47809,2.620844
9,HML_s-b,0.291665,2.573347


In [25]:
# 6 portfolios
port_6_op = pd.read_csv(f"{data_dir}/6_Portfolios_ME_OP_2x3.csv", skiprows=1, names=["Year_Month", "SMALL LoOP", "ME1 OP2", "SMALL HiOP", "BIG LoOP", "ME2 OP2", "BIG HiOP"])

port_6_op = port_6_op[["Year_Month", "SMALL LoOP", "SMALL HiOP", "BIG LoOP", "BIG HiOP"]]
port_6_op = port_6_op.rename(columns={'SMALL LoOP': 'W_s', 'SMALL HiOP': 'R_s', 'BIG LoOP': 'W_b', 'BIG HiOP': 'R_b'}) 
port_6_op = port_6_op[:1176]
port_6_op.set_index("Year_Month", inplace = True)
port_6_op = port_6_op.loc['196307':'201606']

port_6_op = port_6_op.astype(float)

port_6_op

Unnamed: 0_level_0,W_s,R_s,W_b,R_b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
196307,-0.8182,-0.4156,-0.8456,0.1095
196308,4.8507,5.2868,5.5121,5.7862
196309,-1.0166,-2.6991,-1.7189,-1.4500
196310,1.3664,1.6166,0.8173,6.1639
196311,-1.2569,-2.3721,-1.4554,-1.3506
...,...,...,...,...
201602,-1.6093,1.8174,-2.0077,1.0676
201603,7.3877,8.8999,6.9678,7.0027
201604,2.9992,1.2516,3.3480,-0.8534
201605,2.0455,0.3533,2.2697,1.7814


In [26]:
port_6_op['RMW_s'] = port_6_op['R_s'] - port_6_op['W_s']
port_6_op['RMW_b'] = port_6_op['R_b'] - port_6_op['W_b']
port_6_op['R'] = (port_6_op['R_s'] + port_6_op['R_b']) / 2
port_6_op['W'] = (port_6_op['W_s'] + port_6_op['W_b']) / 2
port_6_op['RMW'] = port_6_op['R'] - port_6_op['W']

port_6_op['R-F'] = port_6_op['R'].values - RF.values
port_6_op['R_s-F'] = port_6_op['R_s'].values - RF.values
port_6_op['R_b-F'] = port_6_op['R_b'].values - RF.values

port_6_op['W-F'] = port_6_op['W'].values - RF.values
port_6_op['W_s-F'] = port_6_op['W_s'].values - RF.values
port_6_op['W_b-F'] = port_6_op['W_b'].values - RF.values

port_6_op['RMW_s-b'] = port_6_op['RMW_s'] - port_6_op['RMW_b']
port_6_op['R_s-b'] = port_6_op['R_s'] - port_6_op['R_b']
port_6_op['W_s-b'] = port_6_op['W_s'] - port_6_op['W_b']

port_6_op

Unnamed: 0_level_0,W_s,R_s,W_b,R_b,RMW_s,RMW_b,R,W,RMW,R-F,R_s-F,R_b-F,W-F,W_s-F,W_b-F,RMW_s-b,R_s-b,W_s-b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
196307,-0.8182,-0.4156,-0.8456,0.1095,0.4026,0.9551,-0.15305,-0.83190,0.67885,-0.42305,-0.6856,-0.1605,-1.10190,-1.0882,-1.1156,-0.5525,-0.5251,0.0274
196308,4.8507,5.2868,5.5121,5.7862,0.4361,0.2741,5.53650,5.18140,0.35510,5.28650,5.0368,5.5362,4.93140,4.6007,5.2621,0.1620,-0.4994,-0.6614
196309,-1.0166,-2.6991,-1.7189,-1.4500,-1.6825,0.2689,-2.07455,-1.36775,-0.70680,-2.34455,-2.9691,-1.7200,-1.63775,-1.2866,-1.9889,-1.9514,-1.2491,0.7023
196310,1.3664,1.6166,0.8173,6.1639,0.2502,5.3466,3.89025,1.09185,2.79840,3.60025,1.3266,5.8739,0.80185,1.0764,0.5273,-5.0964,-4.5473,0.5491
196311,-1.2569,-2.3721,-1.4554,-1.3506,-1.1152,0.1048,-1.86135,-1.35615,-0.50520,-2.13135,-2.6421,-1.6206,-1.62615,-1.5269,-1.7254,-1.2200,-1.0215,0.1985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,-1.6093,1.8174,-2.0077,1.0676,3.4267,3.0753,1.44250,-1.80850,3.25100,1.42250,1.7974,1.0476,-1.82850,-1.6293,-2.0277,0.3514,0.7498,0.3984
201603,7.3877,8.8999,6.9678,7.0027,1.5122,0.0349,7.95130,7.17775,0.77355,7.93130,8.8799,6.9827,7.15775,7.3677,6.9478,1.4773,1.8972,0.4199
201604,2.9992,1.2516,3.3480,-0.8534,-1.7476,-4.2014,0.19910,3.17360,-2.97450,0.18910,1.2416,-0.8634,3.16360,2.9892,3.3380,2.4538,2.1050,-0.3488
201605,2.0455,0.3533,2.2697,1.7814,-1.6922,-0.4883,1.06735,2.15760,-1.09025,1.05735,0.3433,1.7714,2.14760,2.0355,2.2597,-1.2039,-1.4281,-0.2242


In [27]:
op_i = 3
op_factors = [factor[op_i] for factor in factors][1:]
port_6_op.index = port_6_op.index.astype(str)

master = pd.concat([master, port_6_op[op_factors]], axis=1)
master

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,UMD,S-F,B-F,HML_s,H-F,H_s-F,L-F,L_s-F,RMW_s,R-F,R_s-F,W-F,W_s-F
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.90,-1.16,-0.21,-0.1242,-1.75780,-1.3902,-0.78855,-1.2660,0.4026,-0.42305,-0.6856,-1.10190,-1.0882
196308,5.07,-0.80,1.80,0.36,-0.35,1.01,3.39,5.14,1.2425,6.50800,5.5286,4.71255,4.2861,0.4361,5.28650,5.0368,4.93140,4.6007
196309,-1.57,-0.52,0.13,-0.71,0.29,0.19,-1.57,-1.45,1.0389,-2.19245,-2.2649,-2.31910,-3.3038,-1.6825,-2.34455,-2.9691,-1.63775,-1.2866
196310,2.53,-1.39,-0.10,2.80,-2.01,3.12,1.05,2.77,1.5143,2.13385,2.4244,2.23230,0.9101,0.2502,3.60025,1.3266,0.80185,1.0764
196311,-0.85,-0.88,1.75,-0.51,2.24,-0.74,-1.80,-0.75,2.5673,-0.10760,-0.6315,-1.85730,-3.1988,-1.1152,-2.13135,-2.6421,-1.62615,-1.5269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,-4.38,-0.06,-0.19,1.0879,-0.93810,0.3580,-0.36420,-0.7299,3.4267,1.42250,1.7974,-1.82850,-1.6293
201603,6.96,1.07,1.19,0.77,-0.08,-5.01,7.12,6.77,0.7604,8.31545,8.4462,7.12090,7.6858,1.5122,7.93130,8.8799,7.15775,7.3677
201604,0.91,1.23,3.28,-2.97,1.90,-6.02,3.37,0.69,1.2044,4.27975,3.8378,0.99695,2.6334,-1.7476,0.18910,1.2416,3.16360,2.9892
201605,1.78,-0.61,-1.66,-1.09,-2.48,1.42,1.23,1.75,-1.8043,0.58505,0.5147,2.24835,2.3190,-1.6922,1.05735,0.3433,2.14760,2.0355


In [28]:
t1_panel_b_op = ['RMW', 'RMW_s', 'RMW_b', 'R-F', 'R_s-F', 'R_b-F', 'W-F', 'W_s-F', 'W_b-F', 'RMW_s-b', 'R_s-b', 'W_s-b']

t1_panel_b_op_table = panel_b_avg_return_t_stat(port_6_op, t1_panel_b_op)
t1_panel_b_op_table

Unnamed: 0,Factors,Average Return,t-statistic
0,RMW,0.264262,2.998224
1,RMW_s,0.317856,3.046044
2,RMW_b,0.210668,2.107735
3,R-F,0.739218,3.782048
4,R_s-F,0.900725,3.823715
5,R_b-F,0.577711,3.343212
6,W-F,0.474956,2.127369
7,W_s-F,0.582869,2.23328
8,W_b-F,0.367043,1.805045
9,RMW_s-b,0.107188,1.037014


In [29]:
t1_panel_b = pd.concat([t1_panel_b, t1_panel_b_op_table], axis=1)
t1_panel_b

Unnamed: 0,Factors,Average Return,t-statistic,Factors.1,Average Return.1,t-statistic.1
0,HML,0.34499,3.112855,RMW,0.264262,2.998224
1,HML_s,0.490823,3.898458,RMW_s,0.317856,3.046044
2,HML_b,0.199158,1.618725,RMW_b,0.210668,2.107735
3,H-F,0.829006,4.165839,R-F,0.739218,3.782048
4,H_s-F,0.980765,4.428705,R_s-F,0.900725,3.823715
5,H_b-F,0.677248,3.517978,R_b-F,0.577711,3.343212
6,L-F,0.484016,2.232661,W-F,0.474956,2.127369
7,L_s-F,0.489942,1.809908,W_s-F,0.582869,2.23328
8,L_b-F,0.47809,2.620844,W_b-F,0.367043,1.805045
9,HML_s-b,0.291665,2.573347,RMW_s-b,0.107188,1.037014


In [30]:
# 6 portfolios
port_6_inv = pd.read_csv(f"{data_dir}/6_Portfolios_ME_INV_2x3.csv", skiprows=17, names=["Year_Month", "SMALL LoINV", "ME1 INV2", "SMALL HiINV", "BIG LoINV", "ME2 INV2", "BIG HiINV"])

port_6_inv = port_6_inv[["Year_Month", "SMALL LoINV", "SMALL HiINV", "BIG LoINV", "BIG HiINV"]]
port_6_inv = port_6_inv.rename(columns={'SMALL LoINV': 'C_s', 'SMALL HiINV': 'A_s', 'BIG LoINV': 'C_b', 'BIG HiINV': 'A_b'}) 
port_6_inv = port_6_inv[:732]
port_6_inv.set_index("Year_Month", inplace = True)
port_6_inv = port_6_inv.loc['196307':'201606']

port_6_inv = port_6_inv.astype(float)

port_6_inv

Unnamed: 0_level_0,C_s,A_s,C_b,A_b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
196307,-0.9899,-0.0320,-1.2596,0.1385
196308,4.6587,5.4120,6.1424,6.0871
196309,-1.0788,-2.3590,-1.2092,-0.5186
196310,2.0494,1.2419,2.2012,7.0302
196311,-0.7633,-2.4620,0.1809,-2.6006
...,...,...,...,...
201602,1.0003,-0.1388,0.6245,-2.2822
201603,8.7504,8.2395,7.0482,7.7229
201604,3.4702,2.4567,1.2671,-1.5156
201605,0.1007,1.6006,0.6394,4.1068


In [31]:
port_6_inv['CMA_s'] = port_6_inv['C_s'] - port_6_inv['A_s']
port_6_inv['CMA_b'] = port_6_inv['C_b'] - port_6_inv['A_b']
port_6_inv['C'] = (port_6_inv['C_s'] + port_6_inv['C_b']) / 2
port_6_inv['A'] = (port_6_inv['A_s'] + port_6_inv['A_b']) / 2
port_6_inv['CMA'] = port_6_inv['C'] - port_6_inv['A']

port_6_inv['C-F'] = port_6_inv['C'].values - RF.values
port_6_inv['C_s-F'] = port_6_inv['C_s'].values - RF.values
port_6_inv['C_b-F'] = port_6_inv['C_b'].values - RF.values

port_6_inv['A-F'] = port_6_inv['A'].values - RF.values
port_6_inv['A_s-F'] = port_6_inv['A_s'].values - RF.values
port_6_inv['A_b-F'] = port_6_inv['A_b'].values - RF.values

port_6_inv['CMA_s-b'] = port_6_inv['CMA_s'] - port_6_inv['CMA_b']
port_6_inv['C_s-b'] = port_6_inv['C_s'] - port_6_inv['C_b']
port_6_inv['A_s-b'] = port_6_inv['A_s'] - port_6_inv['A_b']

port_6_inv

Unnamed: 0_level_0,C_s,A_s,C_b,A_b,CMA_s,CMA_b,C,A,CMA,C-F,C_s-F,C_b-F,A-F,A_s-F,A_b-F,CMA_s-b,C_s-b,A_s-b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
196307,-0.9899,-0.0320,-1.2596,0.1385,-0.9579,-1.3981,-1.12475,0.05325,-1.17800,-1.39475,-1.2599,-1.5296,-0.21675,-0.3020,-0.1315,0.4402,0.2697,-0.1705
196308,4.6587,5.4120,6.1424,6.0871,-0.7533,0.0553,5.40055,5.74955,-0.34900,5.15055,4.4087,5.8924,5.49955,5.1620,5.8371,-0.8086,-1.4837,-0.6751
196309,-1.0788,-2.3590,-1.2092,-0.5186,1.2802,-0.6906,-1.14400,-1.43880,0.29480,-1.41400,-1.3488,-1.4792,-1.70880,-2.6290,-0.7886,1.9708,0.1304,-1.8404
196310,2.0494,1.2419,2.2012,7.0302,0.8075,-4.8290,2.12530,4.13605,-2.01075,1.83530,1.7594,1.9112,3.84605,0.9519,6.7402,5.6365,-0.1518,-5.7883
196311,-0.7633,-2.4620,0.1809,-2.6006,1.6987,2.7815,-0.29120,-2.53130,2.24010,-0.56120,-1.0333,-0.0891,-2.80130,-2.7320,-2.8706,-1.0828,-0.9442,0.1386
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,1.0003,-0.1388,0.6245,-2.2822,1.1391,2.9067,0.81240,-1.21050,2.02290,0.79240,0.9803,0.6045,-1.23050,-0.1588,-2.3022,-1.7676,0.3758,2.1434
201603,8.7504,8.2395,7.0482,7.7229,0.5109,-0.6747,7.89930,7.98120,-0.08190,7.87930,8.7304,7.0282,7.96120,8.2195,7.7029,1.1856,1.7022,0.5166
201604,3.4702,2.4567,1.2671,-1.5156,1.0135,2.7827,2.36865,0.47055,1.89810,2.35865,3.4602,1.2571,0.46055,2.4467,-1.5256,-1.7692,2.2031,3.9723
201605,0.1007,1.6006,0.6394,4.1068,-1.4999,-3.4674,0.37005,2.85370,-2.48365,0.36005,0.0907,0.6294,2.84370,1.5906,4.0968,1.9675,-0.5387,-2.5062


In [32]:
inv_i = 4
inv_factors = [factor[inv_i] for factor in factors][1:]
port_6_inv.index = port_6_inv.index.astype(str)

master = pd.concat([master, port_6_inv[inv_factors]], axis=1)
master

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,UMD,S-F,B-F,HML_s,H-F,...,RMW_s,R-F,R_s-F,W-F,W_s-F,CMA_s,C-F,C_s-F,A-F,A_s-F
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.90,-1.16,-0.21,-0.1242,-1.75780,...,0.4026,-0.42305,-0.6856,-1.10190,-1.0882,-0.9579,-1.39475,-1.2599,-0.21675,-0.3020
196308,5.07,-0.80,1.80,0.36,-0.35,1.01,3.39,5.14,1.2425,6.50800,...,0.4361,5.28650,5.0368,4.93140,4.6007,-0.7533,5.15055,4.4087,5.49955,5.1620
196309,-1.57,-0.52,0.13,-0.71,0.29,0.19,-1.57,-1.45,1.0389,-2.19245,...,-1.6825,-2.34455,-2.9691,-1.63775,-1.2866,1.2802,-1.41400,-1.3488,-1.70880,-2.6290
196310,2.53,-1.39,-0.10,2.80,-2.01,3.12,1.05,2.77,1.5143,2.13385,...,0.2502,3.60025,1.3266,0.80185,1.0764,0.8075,1.83530,1.7594,3.84605,0.9519
196311,-0.85,-0.88,1.75,-0.51,2.24,-0.74,-1.80,-0.75,2.5673,-0.10760,...,-1.1152,-2.13135,-2.6421,-1.62615,-1.5269,1.6987,-0.56120,-1.0333,-2.80130,-2.7320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,-4.38,-0.06,-0.19,1.0879,-0.93810,...,3.4267,1.42250,1.7974,-1.82850,-1.6293,1.1391,0.79240,0.9803,-1.23050,-0.1588
201603,6.96,1.07,1.19,0.77,-0.08,-5.01,7.12,6.77,0.7604,8.31545,...,1.5122,7.93130,8.8799,7.15775,7.3677,0.5109,7.87930,8.7304,7.96120,8.2195
201604,0.91,1.23,3.28,-2.97,1.90,-6.02,3.37,0.69,1.2044,4.27975,...,-1.7476,0.18910,1.2416,3.16360,2.9892,1.0135,2.35865,3.4602,0.46055,2.4467
201605,1.78,-0.61,-1.66,-1.09,-2.48,1.42,1.23,1.75,-1.8043,0.58505,...,-1.6922,1.05735,0.3433,2.14760,2.0355,-1.4999,0.36005,0.0907,2.84370,1.5906


In [33]:
t1_panel_b_inv = ['CMA', 'CMA_s', 'CMA_b', 'C-F', 'C_s-F', 'C_b-F', 'A-F', 'A_s-F', 'A_b-F', 'CMA_s-b', 'C_s-b', 'A_s-b']

t1_panel_b_inv_table = panel_b_avg_return_t_stat(port_6_inv, t1_panel_b_inv)
t1_panel_b_inv_table

Unnamed: 0,Factors,Average Return,t-statistic
0,CMA,0.310078,3.930435
1,CMA_s,0.409638,5.24051
2,CMA_b,0.210517,2.002416
3,C-F,0.795687,4.066081
4,C_s-F,0.932753,3.874555
5,C_b-F,0.658622,3.823236
6,A-F,0.48561,2.168432
7,A_s-F,0.523116,2.01626
8,A_b-F,0.448104,2.188981
9,CMA_s-b,0.19912,2.050469


In [34]:
t1_panel_b = pd.concat([t1_panel_b, t1_panel_b_inv_table], axis=1)
t1_panel_b

Unnamed: 0,Factors,Average Return,t-statistic,Factors.1,Average Return.1,t-statistic.1,Factors.2,Average Return.2,t-statistic.2
0,HML,0.34499,3.112855,RMW,0.264262,2.998224,CMA,0.310078,3.930435
1,HML_s,0.490823,3.898458,RMW_s,0.317856,3.046044,CMA_s,0.409638,5.24051
2,HML_b,0.199158,1.618725,RMW_b,0.210668,2.107735,CMA_b,0.210517,2.002416
3,H-F,0.829006,4.165839,R-F,0.739218,3.782048,C-F,0.795687,4.066081
4,H_s-F,0.980765,4.428705,R_s-F,0.900725,3.823715,C_s-F,0.932753,3.874555
5,H_b-F,0.677248,3.517978,R_b-F,0.577711,3.343212,C_b-F,0.658622,3.823236
6,L-F,0.484016,2.232661,W-F,0.474956,2.127369,A-F,0.48561,2.168432
7,L_s-F,0.489942,1.809908,W_s-F,0.582869,2.23328,A_s-F,0.523116,2.01626
8,L_b-F,0.47809,2.620844,W_b-F,0.367043,1.805045,A_b-F,0.448104,2.188981
9,HML_s-b,0.291665,2.573347,RMW_s-b,0.107188,1.037014,CMA_s-b,0.19912,2.050469


In [35]:
# 6 portfolios
port_6_mom = pd.read_csv(f"{data_dir}/6_Portfolios_Formed_on_Size_Momentum_2x3.csv", skiprows=12, names=["Year_Month", "SMALL LoPRIOR", "ME1 PRIOR2", "SMALL HiPRIOR", "BIG LoPRIOR", "ME2 PRIOR2", "BIG HiPRIOR"])

port_6_mom = port_6_mom[["Year_Month", "SMALL LoPRIOR", "SMALL HiPRIOR", "BIG LoPRIOR", "BIG HiPRIOR"]]
port_6_mom = port_6_mom.rename(columns={'SMALL LoPRIOR': 'D_s', 'SMALL HiPRIOR': 'U_s', 'BIG LoPRIOR': 'D_b', 'BIG HiPRIOR': 'U_b'}) 
port_6_mom = port_6_mom[:1170]
port_6_mom.set_index("Year_Month", inplace = True)
port_6_mom = port_6_mom.loc['196307':'201606']

port_6_mom = port_6_mom.astype(float)

port_6_mom

Unnamed: 0_level_0,D_s,U_s,D_b,U_b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
196307,-1.33,-1.07,-1.53,0.00
196308,4.09,6.36,5.86,5.62
196309,-1.87,-1.27,-2.45,-2.66
196310,1.05,3.52,0.81,4.57
196311,-2.32,-1.41,-0.33,-2.72
...,...,...,...,...
201602,3.21,-1.81,1.70,-2.04
201603,12.88,6.20,9.36,6.01
201604,6.68,1.92,6.73,-0.54
201605,-0.03,1.69,1.09,2.22


In [36]:
port_6_mom['UMD_s'] = port_6_mom['U_s'] - port_6_mom['D_s']
port_6_mom['UMD_b'] = port_6_mom['U_b'] - port_6_mom['D_b']
port_6_mom['U'] = (port_6_mom['U_s'] + port_6_mom['U_b']) / 2
port_6_mom['D'] = (port_6_mom['D_s'] + port_6_mom['D_b']) / 2
port_6_mom['UMD'] = port_6_mom['U'] - port_6_mom['D']

port_6_mom['U-F'] = port_6_mom['U'].values - RF.values
port_6_mom['U_s-F'] = port_6_mom['U_s'].values - RF.values
port_6_mom['U_b-F'] = port_6_mom['U_b'].values - RF.values

port_6_mom['D-F'] = port_6_mom['D'].values - RF.values
port_6_mom['D_s-F'] = port_6_mom['D_s'].values - RF.values
port_6_mom['D_b-F'] = port_6_mom['D_b'].values - RF.values

port_6_mom['UMD_s-b'] = port_6_mom['UMD_s'] - port_6_mom['UMD_b']
port_6_mom['U_s-b'] = port_6_mom['U_s'] - port_6_mom['U_b']
port_6_mom['D_s-b'] = port_6_mom['D_s'] - port_6_mom['D_b']

port_6_mom

Unnamed: 0_level_0,D_s,U_s,D_b,U_b,UMD_s,UMD_b,U,D,UMD,U-F,U_s-F,U_b-F,D-F,D_s-F,D_b-F,UMD_s-b,U_s-b,D_s-b
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
196307,-1.33,-1.07,-1.53,0.00,0.26,1.53,-0.535,-1.430,0.895,-0.805,-1.34,-0.27,-1.700,-1.60,-1.80,-1.27,-1.07,0.20
196308,4.09,6.36,5.86,5.62,2.27,-0.24,5.990,4.975,1.015,5.740,6.11,5.37,4.725,3.84,5.61,2.51,0.74,-1.77
196309,-1.87,-1.27,-2.45,-2.66,0.60,-0.21,-1.965,-2.160,0.195,-2.235,-1.54,-2.93,-2.430,-2.14,-2.72,0.81,1.39,0.58
196310,1.05,3.52,0.81,4.57,2.47,3.76,4.045,0.930,3.115,3.755,3.23,4.28,0.640,0.76,0.52,-1.29,-1.05,0.24
196311,-2.32,-1.41,-0.33,-2.72,0.91,-2.39,-2.065,-1.325,-0.740,-2.335,-1.68,-2.99,-1.595,-2.59,-0.60,3.30,1.31,-1.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,3.21,-1.81,1.70,-2.04,-5.02,-3.74,-1.925,2.455,-4.380,-1.945,-1.83,-2.06,2.435,3.19,1.68,-1.28,0.23,1.51
201603,12.88,6.20,9.36,6.01,-6.68,-3.35,6.105,11.120,-5.015,6.085,6.18,5.99,11.100,12.86,9.34,-3.33,0.19,3.52
201604,6.68,1.92,6.73,-0.54,-4.76,-7.27,0.690,6.705,-6.015,0.680,1.91,-0.55,6.695,6.67,6.72,2.51,2.46,-0.05
201605,-0.03,1.69,1.09,2.22,1.72,1.13,1.955,0.530,1.425,1.945,1.68,2.21,0.520,-0.04,1.08,0.59,-0.53,-1.12


In [37]:
mom_i = 5
mom_factors = [factor[mom_i] for factor in factors][1:]
port_6_mom.index = port_6_mom.index.astype(str)

master = pd.concat([master, port_6_mom[mom_factors]], axis=1)
master

Unnamed: 0_level_0,Mkt,SMB,HML,RMW,CMA,UMD,S-F,B-F,HML_s,H-F,...,CMA_s,C-F,C_s-F,A-F,A_s-F,UMD_s,U-F,U_s-F,D-F,D_s-F
Year_Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
196307,-0.39,-0.41,-0.97,0.68,-1.18,0.90,-1.16,-0.21,-0.1242,-1.75780,...,-0.9579,-1.39475,-1.2599,-0.21675,-0.3020,0.26,-0.805,-1.34,-1.700,-1.60
196308,5.07,-0.80,1.80,0.36,-0.35,1.01,3.39,5.14,1.2425,6.50800,...,-0.7533,5.15055,4.4087,5.49955,5.1620,2.27,5.740,6.11,4.725,3.84
196309,-1.57,-0.52,0.13,-0.71,0.29,0.19,-1.57,-1.45,1.0389,-2.19245,...,1.2802,-1.41400,-1.3488,-1.70880,-2.6290,0.60,-2.235,-1.54,-2.430,-2.14
196310,2.53,-1.39,-0.10,2.80,-2.01,3.12,1.05,2.77,1.5143,2.13385,...,0.8075,1.83530,1.7594,3.84605,0.9519,2.47,3.755,3.23,0.640,0.76
196311,-0.85,-0.88,1.75,-0.51,2.24,-0.74,-1.80,-0.75,2.5673,-0.10760,...,1.6987,-0.56120,-1.0333,-2.80130,-2.7320,0.91,-2.335,-1.68,-1.595,-2.59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201602,-0.07,0.88,-0.57,3.25,2.02,-4.38,-0.06,-0.19,1.0879,-0.93810,...,1.1391,0.79240,0.9803,-1.23050,-0.1588,-5.02,-1.945,-1.83,2.435,3.19
201603,6.96,1.07,1.19,0.77,-0.08,-5.01,7.12,6.77,0.7604,8.31545,...,0.5109,7.87930,8.7304,7.96120,8.2195,-6.68,6.085,6.18,11.100,12.86
201604,0.91,1.23,3.28,-2.97,1.90,-6.02,3.37,0.69,1.2044,4.27975,...,1.0135,2.35865,3.4602,0.46055,2.4467,-4.76,0.680,1.91,6.695,6.67
201605,1.78,-0.61,-1.66,-1.09,-2.48,1.42,1.23,1.75,-1.8043,0.58505,...,-1.4999,0.36005,0.0907,2.84370,1.5906,1.72,1.945,1.68,0.520,-0.04


In [41]:
master.to_csv('master_data_all_factors.csv', index=False)

In [38]:
t1_panel_b_mom = ['UMD', 'UMD_s', 'UMD_b', 'U-F', 'U_s-F', 'U_b-F', 'D-F', 'D_s-F', 'D_b-F', 'UMD_s-b', 'U_s-b', 'D_s-b']

t1_panel_b_mom_table = panel_b_avg_return_t_stat(port_6_mom, t1_panel_b_mom)
t1_panel_b_mom_table

Unnamed: 0,Factors,Average Return,t-statistic
0,UMD,0.690708,4.112541
1,UMD_s,0.914701,5.469551
2,UMD_b,0.466714,2.508793
3,U-F,0.96228,4.56285
4,U_s-F,1.169135,4.774086
5,U_b-F,0.755425,3.939266
6,D-F,0.271572,1.092782
7,D_s-F,0.254434,0.899359
8,D_b-F,0.288711,1.242741
9,UMD_s-b,0.447987,4.03616


In [39]:
t1_panel_b = pd.concat([t1_panel_b, t1_panel_b_mom_table], axis=1)
t1_panel_b

Unnamed: 0,Factors,Average Return,t-statistic,Factors.1,Average Return.1,t-statistic.1,Factors.2,Average Return.2,t-statistic.2,Factors.3,Average Return.3,t-statistic.3
0,HML,0.34499,3.112855,RMW,0.264262,2.998224,CMA,0.310078,3.930435,UMD,0.690708,4.112541
1,HML_s,0.490823,3.898458,RMW_s,0.317856,3.046044,CMA_s,0.409638,5.24051,UMD_s,0.914701,5.469551
2,HML_b,0.199158,1.618725,RMW_b,0.210668,2.107735,CMA_b,0.210517,2.002416,UMD_b,0.466714,2.508793
3,H-F,0.829006,4.165839,R-F,0.739218,3.782048,C-F,0.795687,4.066081,U-F,0.96228,4.56285
4,H_s-F,0.980765,4.428705,R_s-F,0.900725,3.823715,C_s-F,0.932753,3.874555,U_s-F,1.169135,4.774086
5,H_b-F,0.677248,3.517978,R_b-F,0.577711,3.343212,C_b-F,0.658622,3.823236,U_b-F,0.755425,3.939266
6,L-F,0.484016,2.232661,W-F,0.474956,2.127369,A-F,0.48561,2.168432,D-F,0.271572,1.092782
7,L_s-F,0.489942,1.809908,W_s-F,0.582869,2.23328,A_s-F,0.523116,2.01626,D_s-F,0.254434,0.899359
8,L_b-F,0.47809,2.620844,W_b-F,0.367043,1.805045,A_b-F,0.448104,2.188981,D_b-F,0.288711,1.242741
9,HML_s-b,0.291665,2.573347,RMW_s-b,0.107188,1.037014,CMA_s-b,0.19912,2.050469,UMD_s-b,0.447987,4.03616


In [178]:
t1_panel_b.to_excel('./result/table1_b.xlsx', index=False)

### Table 3

In [156]:
# Generate synthetic data for demonstration
np.random.seed(42)

def simulate_sharpe_ratios(returns, factor, n_simulations=100):
    n_months = len(returns)
    n_pairs = n_months // 2
    pairs = [(i, i+1) for i in range(0, n_months, 2)]

    fs_sharpe_ratios = []
    is_sharpe_ratios = []
    os_sharpe_ratios = []

    for _ in range(n_simulations):
        # Full-Sample (FS) simulation
        fs_sample = returns.sample(n=n_months, replace=True)
        fs_sharpe_ratio = squared_sharpe_ratio(fs_sample, factor)
        fs_sharpe_ratios.append(fs_sharpe_ratio)

        # In-Sample (IS) and Out-of-Sample (OS) simulations
        sampled_pairs = np.random.choice(range(n_pairs), size=n_pairs, replace=True)
        is_sample_indices = []
        os_sample_indices = []

        for pair_index in sampled_pairs:
            month1, month2 = pairs[pair_index]
            if np.random.rand() < 0.5:
                is_sample_indices.append(month1)
                os_sample_indices.append(month2)
            else:
                is_sample_indices.append(month2)
                os_sample_indices.append(month1)

        is_sample = returns.iloc[is_sample_indices]
        os_sample = returns.iloc[os_sample_indices]

        # Step 1: Calculate IS Tangency Portfolio Weights
        is_sharpe_ratio = squared_sharpe_ratio(is_sample, factor)
        is_sharpe_ratios.append(is_sharpe_ratio)
        is_portfolio_weights = tangency_portfolio_weights(is_sample, factor)
        is_portfolio_weights = is_portfolio_weights.ravel()
        
        # Step 2: Apply IS Weights to OS Sample to Compute OS Sharpe Ratio
        os_sharpe_ratio = squared_sharpe_ratio(os_sample, factor, is_portfolio_weights)
        os_sharpe_ratios.append(os_sharpe_ratio)

    # return fs_sharpe_ratios, is_sharpe_ratios, os_sharpe_ratios

    # Convert to DataFrame
    sharpe_ratios = pd.DataFrame({
        'FS': fs_sharpe_ratios,
        'IS': is_sharpe_ratios,
        'OS': os_sharpe_ratios
    })

    return sharpe_ratios

def squared_sharpe_ratio(sample, factor, weights=None):

    if weights is None:
        mu_f = sample[factor].mean().values.reshape(-1, 1) 
        sigma_f = sample[factor].cov().values
        sigma_f_inv = np.linalg.inv(sigma_f)

        # maximum squared Sharpe ratio
        sh2_f = mu_f.T @ sigma_f_inv @ mu_f
        sh2_f = sh2_f[0,0]

    else:
        returns = (sample[factor] * weights).sum(axis=1)
        mu_f = returns.mean()
        sigma_f = returns.std()

        # Calculate squared Sharpe ratio
        sh2_f = (mu_f / sigma_f) ** 2

    return sh2_f

def tangency_portfolio_weights(is_sample, factor):
    """
    Calculate the tangency portfolio weights.
    
    :param expected_returns: A vector of expected returns (mu)
    :param covariance_matrix: The covariance matrix (Sigma)
    :param risk_free_rate: The risk-free rate (rf)
    :return: A vector of tangency portfolio weights (t)

    source: https://bookdown.org/compfinezbook/introcompfinr/Efficient-portfolios-of.html - eq. 12.26
    """
    mu_f = is_sample[factor].mean().values.reshape(-1, 1) 
    sigma_f = is_sample[factor].cov().values
    sigma_f_inv = np.linalg.inv(sigma_f)

    # Inverse of the covariance matrix
    sigma_f_inv = np.linalg.inv(sigma_f)
    
    # Compute the numerator: Σ^(-1) * (μ - rf * 1) - mu_f is the derived from Mkt (in excess of risk free rate)
    numerator = sigma_f_inv @ (mu_f)
    
    # Compute the denominator: 1' * Σ^(-1) * (μ - rf * 1)
    denominator = np.ones(len(mu_f)) @ numerator
    
    # Tangency portfolio weights
    tangency_weights = numerator / denominator
    
    return tangency_weights

In [157]:
summary_df = pd.DataFrame(columns=[
    'Model', 'Actual', 'FS Average', 'FS Median', 
    'IS Average', 'IS Median', 'OS Average', 'OS Median'
])

N = 100000

# Iterate over the list of factor sets and calculate the summary statistics
for factor in factors:
    
    actual_sharpe = squared_sharpe_ratio(master, factor)
                     
    # Run the simulation for the current set of factors
    sharpe_ratios = simulate_sharpe_ratios(master, factor, n_simulations=N)

    # Calculate average and median Sharpe ratios
    summary_stats = sharpe_ratios.describe().loc[['mean', '50%']]

    # Create a string representation of the current factor set for labeling
    model_name = ', '.join(factor)

    # Extract the statistics for FS, IS, and OS
    fs_avg = summary_stats.loc['mean', 'FS']
    fs_median = summary_stats.loc['50%', 'FS']
    is_avg = summary_stats.loc['mean', 'IS']
    is_median = summary_stats.loc['50%', 'IS']
    os_avg = summary_stats.loc['mean', 'OS']
    os_median = summary_stats.loc['50%', 'OS']

    # Add the summary statistics for the current model to the DataFrame
    # Create a DataFrame for the current model's results
    model_df = pd.DataFrame({
        'Model': [model_name],
        'Actual': [actual_sharpe],
        'FS Average': [fs_avg],
        'FS Median': [fs_median],
        'IS Average': [is_avg],
        'IS Median': [is_median],
        'OS Average': [os_avg],
        'OS Median': [os_median]
    })

    # Concatenate the current model's DataFrame with the summary DataFrame
    summary_df = pd.concat([summary_df, model_df], ignore_index=True)

# Display the summary table
summary_df

Unnamed: 0,Model,Actual,FS Average,FS Median,IS Average,IS Median,OS Average,OS Median
0,"Mkt, SMB, HML, RMW, CMA, UMD",0.141268,0.15558,0.153001,0.170338,0.165171,0.133163,0.128445
1,"Mkt, SMB, HML_s, RMW_s, CMA_s, UMD_s",0.198995,0.214116,0.211034,0.230197,0.224304,0.190063,0.184377
2,"Mkt, S-F, H-F, R-F, C-F, U-F",0.135936,0.149377,0.147047,0.16294,0.158365,0.127966,0.12344
3,"Mkt, S-F, H_s-F, R_s-F, C_s-F, U_s-F",0.14857,0.162039,0.159611,0.17552,0.170653,0.140439,0.135646
4,"Mkt, S-F, L-F, W-F, A-F, D-F",0.085411,0.097704,0.095662,0.109932,0.105778,0.077369,0.073373
5,"Mkt, S-F, L_s-F, W_s-F, A_s-F, D_s-F",0.103723,0.116428,0.114248,0.129727,0.125173,0.095185,0.091009


In [158]:
summary_df.to_excel('./result/table3.xlsx', index=False)

In [179]:
summary_df.sort_values('Actual', ascending=False).head(3)

Unnamed: 0,Model,Actual,FS Average,FS Median,IS Average,IS Median,OS Average,OS Median
1,"Mkt, SMB, HML_s, RMW_s, CMA_s, UMD_s",0.198995,0.214116,0.211034,0.230197,0.224304,0.190063,0.184377
3,"Mkt, S-F, H_s-F, R_s-F, C_s-F, U_s-F",0.14857,0.162039,0.159611,0.17552,0.170653,0.140439,0.135646
0,"Mkt, SMB, HML, RMW, CMA, UMD",0.141268,0.15558,0.153001,0.170338,0.165171,0.133163,0.128445
