In [82]:
import os
import sys
import numpy as np
import pandas as pd
import statsmodels.api as sm
from functools import reduce

In [9]:
fama5 = pd.read_csv('F-F_Research_Data_5_Factors_2x3.csv', header=2)
fama5.rename(columns={'Unnamed: 0': 'date'}, inplace=True)
fama5 = fama5.loc[fama5['date'] > '100000']
fama5['date'] = fama5['date'].astype(np.int32)
fama5[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']] = fama5[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']].astype(np.float32)
fama5

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,196307,-0.39,-0.41,-0.97,0.68,-1.18,0.27
1,196308,5.07,-0.80,1.80,0.36,-0.35,0.25
2,196309,-1.57,-0.52,0.13,-0.71,0.29,0.27
3,196310,2.53,-1.39,-0.10,2.80,-2.01,0.29
4,196311,-0.85,-0.88,1.75,-0.51,2.24,0.27
...,...,...,...,...,...,...,...
718,202305,0.35,-0.38,-7.72,-1.81,-7.22,0.36
719,202306,6.46,1.34,-0.26,2.18,-1.62,0.40
720,202307,3.21,2.86,4.11,-0.57,0.57,0.45
721,202308,-2.39,-3.65,-1.06,3.43,-2.37,0.45


In [46]:
momentum = pd.read_csv("F-F_Momentum_Factor.csv", header=11)
momentum.rename(columns={"Unnamed: 0": "date", "Mom   ": "UMD"}, inplace=True)
momentum = momentum.loc[(momentum['date'] > '100000') & (momentum['date'] < '202312')]
momentum['date'] = momentum['date'].astype(np.int32)
momentum['UMD'] = momentum['UMD'].astype(np.float32)
momentum

Unnamed: 0,date,UMD
0,192701,0.36
1,192702,-2.14
2,192703,3.61
3,192704,4.30
4,192705,3.00
...,...,...
1156,202305,-0.63
1157,202306,-2.37
1158,202307,-3.98
1159,202308,3.77


In [61]:
hml_dev = pd.read_excel("The Devil in HMLs Details Factors Monthly.xlsx", header=18)
hml_dev.rename(columns={'DATE': 'date', 'USA': 'HML-DEV'}, inplace=True)
hml_dev = hml_dev.loc[:,['date', 'HML-DEV']]
# Convert the 'date' column to datetime format
hml_dev['date'] = pd.to_datetime(hml_dev['date'])
# Extract year and month and combine them to yyyymm format
hml_dev['date'] = hml_dev['date'].dt.year * 100 + hml_dev['date'].dt.month
# Convert the 'yyyymm' column to integer format (it's actually already in int64 format due to the above operation, but this is just to be explicit)
hml_dev['date'] = hml_dev['date'].astype(np.int32)
hml_dev['HML-DEV'] = hml_dev['HML-DEV'].astype(np.float32)
hml_dev['HML-DEV'] = hml_dev['HML-DEV'] * 100
hml_dev

Unnamed: 0,date,HML-DEV
0,192607,-1.985575
1,192608,5.235148
2,192609,-2.334913
3,192610,0.825291
4,192611,0.202532
...,...,...
1159,202302,-1.986047
1160,202303,-8.347044
1161,202304,-0.228598
1162,202305,-6.129721


In [47]:
fama5_cols = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

coefs, t_vals, r2 = [], [], []

for f in fama5_cols:
    # Define the independent variables (add a constant to the model)
    fama5_fs = [i for i in fama5_cols if i != f]
    X = fama5[fama5_fs]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    # Define the dependent variable
    y = fama5[f]

    # Fit the regression model
    model = sm.OLS(y, X).fit()

    # To extract the coefficients, t-values, and R-squared you can use:
    coefficients = model.params.to_frame()
    t_values = model.tvalues.to_frame()
    r_squared = model.rsquared

    coefficients.rename(columns={0: f}, inplace=True)
    t_values.rename(columns={0: f}, inplace=True)

    coefs.append(coefficients)
    t_vals.append(t_values)
    r2.append(r_squared)

# # Merge all DataFrames on their index in one line
coefs = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), coefs)
coefs = coefs.T[['const']+fama5_cols]
coefs['R-squared'] = r2
t_vals = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), t_vals)
t_vals = t_vals.T[['const']+fama5_cols]
t_vals['R-squared'] = r2
print('The Coefficients of Different Regressions:')
print(coefs)
print('--------------------------------------------------------')
print('The t-values of Different Regressions:')
print(t_vals)

The Coefficients of Different Regressions:
           const    Mkt-RF       SMB       HML       RMW       CMA  R-squared
Mkt-RF  0.766368       NaN  0.299654  0.114905 -0.246179 -0.855874   0.205702
SMB     0.270752  0.139725       NaN  0.139969 -0.447136 -0.171200   0.182744
HML    -0.088493  0.032783  0.085640       NaN  0.184782  1.028124   0.490605
RMW     0.402177 -0.063264 -0.246429  0.166442       NaN -0.260684   0.164304
CMA     0.252601 -0.107041 -0.045919  0.450695 -0.126866       NaN   0.536888
--------------------------------------------------------
The t-values of Different Regressions:
           const    Mkt-RF       SMB        HML       RMW        CMA  \
Mkt-RF  4.998020       NaN  5.601404   1.647681 -3.370357  -8.509546   
SMB     2.553534  5.601404       NaN   2.951449 -9.429283  -2.385184   
HML    -1.062997  1.647681  2.951449        NaN  4.773168  24.899440   
RMW     5.180435 -3.370357 -9.429283   4.773168       NaN  -4.955590   
CMA     4.647668 -8.509546 -2.385

In [48]:
fama6 = pd.merge(fama5, momentum, on='date', how='left')
fama6

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF,UMD
0,196307,-0.39,-0.41,-0.97,0.68,-1.18,0.27,0.90
1,196308,5.07,-0.80,1.80,0.36,-0.35,0.25,1.01
2,196309,-1.57,-0.52,0.13,-0.71,0.29,0.27,0.19
3,196310,2.53,-1.39,-0.10,2.80,-2.01,0.29,3.12
4,196311,-0.85,-0.88,1.75,-0.51,2.24,0.27,-0.74
...,...,...,...,...,...,...,...,...
718,202305,0.35,-0.38,-7.72,-1.81,-7.22,0.36,-0.63
719,202306,6.46,1.34,-0.26,2.18,-1.62,0.40,-2.37
720,202307,3.21,2.86,4.11,-0.57,0.57,0.45,-3.98
721,202308,-2.39,-3.65,-1.06,3.43,-2.37,0.45,3.77


In [50]:
fama6_cols = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'UMD']

coefs, t_vals, r2 = [], [], []

for f in fama6_cols:
    # Define the independent variables (add a constant to the model)
    fama6_fs = [i for i in fama6_cols if i != f]
    X = fama6[fama6_fs]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    # Define the dependent variable
    y = fama6[f]

    # Fit the regression model
    model = sm.OLS(y, X).fit()

    # To extract the coefficients, t-values, and R-squared you can use:
    coefficients = model.params.to_frame()
    t_values = model.tvalues.to_frame()
    r_squared = model.rsquared

    coefficients.rename(columns={0: f}, inplace=True)
    t_values.rename(columns={0: f}, inplace=True)

    coefs.append(coefficients)
    t_vals.append(t_values)
    r2.append(r_squared)

# # Merge all DataFrames on their index in one line
coefs = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), coefs)
coefs = coefs.T[['const']+fama6_cols]
coefs['R-squared'] = r2
t_vals = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), t_vals)
t_vals = t_vals.T[['const']+fama6_cols]
t_vals['R-squared'] = r2
print('The Coefficients of Different Regressions:')
print(coefs)
print('--------------------------------------------------------')
print('The t-values of Different Regressions:')
print(t_vals)

The Coefficients of Different Regressions:
           const    Mkt-RF       SMB       HML       RMW       CMA       UMD  \
Mkt-RF  0.852509       NaN  0.297845  0.033243 -0.210620 -0.776524 -0.156686   
SMB     0.259133  0.142362       NaN  0.148446 -0.450054 -0.177389  0.017033   
HML     0.007880  0.009070  0.084740       NaN  0.197511  1.008311 -0.134450   
RMW     0.366270 -0.055017 -0.245955  0.189088       NaN -0.276083  0.048021   
CMA     0.217627 -0.097931 -0.046804  0.466054 -0.133293       NaN  0.045678   
UMD     0.672296 -0.159882  0.036363 -0.502812  0.187586  0.369584       NaN   

        R-squared  
Mkt-RF   0.225601  
SMB      0.183251  
HML      0.525042  
RMW      0.171832  
CMA      0.544706  
UMD      0.102429  
--------------------------------------------------------
The t-values of Different Regressions:
           const    Mkt-RF       SMB        HML       RMW        CMA  \
Mkt-RF  5.578135       NaN  5.634554   0.465035 -2.899265  -7.681942   
SMB     2.410677

In [62]:
fama5_dev = pd.merge(fama5, hml_dev, on='date', how='left')
del fama5_dev['HML']
fama5_dev.dropna(inplace=True)
fama5_dev

Unnamed: 0,date,Mkt-RF,SMB,RMW,CMA,RF,HML-DEV
0,196307,-0.39,-0.41,0.68,-1.18,0.27,-0.592258
1,196308,5.07,-0.80,0.36,-0.35,0.25,0.280529
2,196309,-1.57,-0.52,-0.71,0.29,0.27,0.265286
3,196310,2.53,-1.39,2.80,-2.01,0.29,-2.112853
4,196311,-0.85,-0.88,-0.51,2.24,0.27,0.907441
...,...,...,...,...,...,...,...
715,202302,-2.58,0.66,1.01,-1.33,0.34,-1.986047
716,202303,2.51,-6.94,2.24,-2.37,0.36,-8.347044
717,202304,0.61,-2.56,2.42,2.86,0.35,-0.228598
718,202305,0.35,-0.38,-1.81,-7.22,0.36,-6.129721


In [63]:
fama5_dev_cols = ['Mkt-RF', 'SMB', 'HML-DEV', 'RMW', 'CMA']

coefs, t_vals, r2 = [], [], []

for f in fama5_dev_cols:
    # Define the independent variables (add a constant to the model)
    fama5_dev_fs = [i for i in fama5_dev_cols if i != f]
    X = fama5_dev[fama5_dev_fs]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    # Define the dependent variable
    y = fama5_dev[f]

    # Fit the regression model
    model = sm.OLS(y, X).fit()

    # To extract the coefficients, t-values, and R-squared you can use:
    coefficients = model.params.to_frame()
    t_values = model.tvalues.to_frame()
    r_squared = model.rsquared

    coefficients.rename(columns={0: f}, inplace=True)
    t_values.rename(columns={0: f}, inplace=True)

    coefs.append(coefficients)
    t_vals.append(t_values)
    r2.append(r_squared)

# # Merge all DataFrames on their index in one line
coefs = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), coefs)
coefs = coefs.T[['const']+fama5_dev_cols]
coefs['R-squared'] = r2
t_vals = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), t_vals)
t_vals = t_vals.T[['const']+fama5_dev_cols]
t_vals['R-squared'] = r2
print('The Coefficients of Different Regressions:')
print(coefs)
print('--------------------------------------------------------')
print('The t-values of Different Regressions:')
print(t_vals)

The Coefficients of Different Regressions:
            const    Mkt-RF       SMB   HML-DEV       RMW       CMA  R-squared
Mkt-RF   0.770172       NaN  0.285259  0.181748 -0.224663 -0.910788   0.217877
SMB      0.268061  0.136059       NaN  0.075078 -0.424306 -0.104595   0.176398
HML-DEV -0.082124  0.102386  0.088674       NaN  0.037795  0.984710   0.295727
RMW      0.396166 -0.060585 -0.239899  0.018093       NaN -0.107561   0.136228
CMA      0.310178 -0.153002 -0.036839  0.293643 -0.067004       NaN   0.387934
--------------------------------------------------------
The t-values of Different Regressions:
            const     Mkt-RF       SMB    HML-DEV       RMW        CMA  \
Mkt-RF   5.051263        NaN  5.373191   3.682025 -3.141076 -10.759642   
SMB      2.512443   5.373191       NaN   2.189065 -9.001572  -1.663024   
HML-DEV -0.705401   3.682025  2.189065        NaN  0.699469  17.054123   
RMW      5.001785  -3.141076 -9.001572   0.699469       NaN  -2.278244   
CMA      4.960393

In [64]:
fama6_dev = pd.merge(fama6, hml_dev, on='date', how='left')
del fama6_dev['HML']
fama6_dev.dropna(inplace=True)
fama6_dev

Unnamed: 0,date,Mkt-RF,SMB,RMW,CMA,RF,UMD,HML-DEV
0,196307,-0.39,-0.41,0.68,-1.18,0.27,0.90,-0.592258
1,196308,5.07,-0.80,0.36,-0.35,0.25,1.01,0.280529
2,196309,-1.57,-0.52,-0.71,0.29,0.27,0.19,0.265286
3,196310,2.53,-1.39,2.80,-2.01,0.29,3.12,-2.112853
4,196311,-0.85,-0.88,-0.51,2.24,0.27,-0.74,0.907441
...,...,...,...,...,...,...,...,...
715,202302,-2.58,0.66,1.01,-1.33,0.34,0.15,-1.986047
716,202303,2.51,-6.94,2.24,-2.37,0.36,-2.47,-8.347044
717,202304,0.61,-2.56,2.42,2.86,0.35,1.62,-0.228598
718,202305,0.35,-0.38,-1.81,-7.22,0.36,-0.63,-6.129721


In [65]:
fama6_dev_cols = ['Mkt-RF', 'SMB', 'HML-DEV', 'RMW', 'CMA', 'UMD']

coefs, t_vals, r2 = [], [], []

for f in fama6_dev_cols:
    # Define the independent variables (add a constant to the model)
    fama6_dev_fs = [i for i in fama6_dev_cols if i != f]
    X = fama6_dev[fama6_dev_fs]
    X = sm.add_constant(X)  # Adds a constant term to the predictor

    # Define the dependent variable
    y = fama6_dev[f]

    # Fit the regression model
    model = sm.OLS(y, X).fit()

    # To extract the coefficients, t-values, and R-squared you can use:
    coefficients = model.params.to_frame()
    t_values = model.tvalues.to_frame()
    r_squared = model.rsquared

    coefficients.rename(columns={0: f}, inplace=True)
    t_values.rename(columns={0: f}, inplace=True)

    coefs.append(coefficients)
    t_vals.append(t_values)
    r2.append(r_squared)

# # Merge all DataFrames on their index in one line
coefs = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), coefs)
coefs = coefs.T[['const']+fama6_dev_cols]
coefs['R-squared'] = r2
t_vals = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), t_vals)
t_vals = t_vals.T[['const']+fama6_dev_cols]
t_vals['R-squared'] = r2
print('The Coefficients of Different Regressions:')
print(coefs)
print('--------------------------------------------------------')
print('The t-values of Different Regressions:')
print(t_vals)

The Coefficients of Different Regressions:
            const    Mkt-RF       SMB   HML-DEV       RMW       CMA       UMD  \
Mkt-RF   0.853119       NaN  0.294628  0.034046 -0.203498 -0.778638 -0.142483   
SMB      0.213694  0.140962       NaN  0.158859 -0.432214 -0.174898  0.082362   
HML-DEV  0.301040  0.008836  0.086178       NaN  0.087663  0.907428 -0.531367   
RMW      0.346844 -0.054915 -0.243782  0.091146       NaN -0.168243  0.071531   
CMA      0.098738 -0.105500 -0.049531  0.473717 -0.084474       NaN  0.233744   
UMD      0.637022 -0.071243  0.086076 -1.023681  0.132538  0.862588       NaN   

         R-squared  
Mkt-RF    0.225816  
SMB       0.182236  
HML-DEV   0.678817  
RMW       0.144417  
CMA       0.511342  
UMD       0.560717  
--------------------------------------------------------
The t-values of Different Regressions:
            const    Mkt-RF       SMB    HML-DEV       RMW        CMA  \
Mkt-RF   5.508771       NaN  5.562223   0.463540 -2.840627  -7.993873   


In [106]:
port_25 = pd.read_csv("25_Portfolios_5x5.csv", header=9)
port_25 = port_25.iloc[:1167]
port_25.rename(columns={"Unnamed: 0": "date"}, inplace=True)
port_25 = port_25.loc[(port_25['date'] >= '196307') & (port_25['date'] < '202312')].reset_index(drop=True)
port_25 = port_25.astype(np.float32)
port_25['date'] = port_25['date'].astype(np.int32)
port_25

Unnamed: 0,date,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
0,196307,1.1307,-0.3091,0.7079,0.1062,-1.3211,-1.8071,0.1899,-1.0105,-1.9644,...,-0.9806,-1.6781,-1.9184,-1.5741,-1.8567,0.1547,0.4833,1.2286,-0.5862,-1.1026
1,196308,4.2370,1.3834,1.4977,2.3755,4.7567,5.5665,4.5191,4.4424,4.4188,...,5.4259,4.7306,6.2332,7.6782,5.3469,5.7691,4.2550,4.5936,8.2831,6.3824
2,196309,-2.8878,0.6263,-1.0204,-1.6000,-0.4320,-4.0502,-1.5034,-0.8798,-1.1812,...,-2.7974,-2.0801,-1.7800,-3.9639,-1.9943,-1.3595,-0.8054,-0.8135,-0.2145,-3.4963
3,196310,1.2885,-0.7071,1.3132,0.0855,2.3988,1.1916,4.2342,2.3524,2.2012,...,-0.3902,0.6829,2.6229,4.8492,0.6113,5.3339,1.7427,-0.2469,2.3915,0.4857
4,196311,-3.3751,-3.7534,-1.8055,-1.0487,-1.0538,-4.2561,-1.7534,-0.7809,-0.1002,...,-0.8798,-0.6434,-0.7933,1.3610,3.5388,-1.2556,1.0072,-1.7425,-2.0838,1.3455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
718,202305,4.7931,2.5041,3.2341,-1.1028,-1.8267,1.1439,3.4223,2.0769,1.9823,...,0.1583,-2.3246,-3.9899,-3.1398,-6.4185,5.5713,-1.2059,-5.0786,-7.2895,-4.6959
719,202306,6.7915,3.2369,5.5211,6.5630,7.2663,6.8976,7.0767,9.2302,8.7954,...,8.1501,9.2344,9.2168,9.8213,6.8319,7.1733,5.7064,6.1545,5.6528,5.3506
720,202307,2.6098,3.0337,3.9666,6.8317,10.1916,4.1133,5.5433,6.1513,8.4454,...,1.2442,4.3803,4.9389,9.2916,8.5163,2.3796,6.1497,4.0021,5.2123,6.7310
721,202308,-12.2584,-7.6627,-10.5101,-5.6379,-7.3597,-6.9345,-7.3292,-3.9622,-5.0425,...,-2.4000,-2.3534,-2.8719,-3.8686,-2.1140,-0.8338,-0.9250,-2.6918,-2.9058,-7.3853


In [123]:
time_periods = port_25.shape[0]
num_portfolios = port_25.shape[1] - 1

# Initialize a DataFrame to store the alphas and t-values
alphas_and_tstats = pd.DataFrame(columns=['Alpha', 'T-Value'])
residuals = pd.DataFrame()

# Fama-French factors excluding the risk-free rate
factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

for column in port_25.columns:
    if column != 'date':
        # Regression: portfolio returns ~ Fama-French five factors
        Y = port_25[column] - fama5['RF']  # Excess returns
        X = fama5[factors]
        X = sm.add_constant(X)  # Add a constant term

        model = sm.OLS(Y, X).fit()
        alpha = model.params['const']
        t_value = model.tvalues
        
        # Collect the residuals
        residuals[column] = model.resid

        # Store alpha
        alphas_and_tstats.loc[column] = [model.params['const'], model.tvalues['const']]

# Calculate the covariance matrix of residuals
cov_matrix = residuals.cov().values * (time_periods - 1) / time_periods  # To get sample covariance

# Alphas as a vector
alphas_vector = alphas_and_tstats['Alpha'].values.flatten()

# Lambda is the average value of the Fama-French factors
lambda_ = fama5[factors].mean(axis=0).values

# The GRS test statistic
GRS_stat = (time_periods - len(factors) - num_portfolios) / len(factors) * \
           (alphas_vector.T @ np.linalg.inv(cov_matrix) @ alphas_vector) / \
           (1 + lambda_.T @ lambda_)

# Calculate the p-value
from scipy.stats import f
df1 = len(factors)
df2 = time_periods - len(factors) - 1
p_value = 1 - f.cdf(GRS_stat, df1, df2)

# Display results
print("Alphas and T-values for each portfolio:")
print(alphas_and_tstats)
print("GRS Test Statistic:")
print(GRS_stat)
print("P-value:")
print(p_value)

Alphas and T-values for each portfolio:
               Alpha   T-Value
SMALL LoBM -0.291354 -3.405250
ME1 BM2     0.146882  2.178582
ME1 BM3     0.003307  0.064629
ME1 BM4     0.159721  3.167580
SMALL HiBM  0.140960  1.847922
ME2 BM1    -0.096749 -1.598048
ME2 BM2     0.015943  0.314508
ME2 BM3     0.012078  0.233857
ME2 BM4     0.030545  0.637823
ME2 BM5     0.006936  0.130115
ME3 BM1     0.003085  0.053973
ME3 BM2     0.038951  0.685458
ME3 BM3    -0.073773 -1.288603
ME3 BM4     0.009042  0.161500
ME3 BM5    -0.025089 -0.357210
ME4 BM1     0.157739  2.691043
ME4 BM2    -0.136900 -2.232358
ME4 BM3    -0.110499 -1.749243
ME4 BM4     0.032519  0.487873
ME4 BM5    -0.055982 -0.711067
BIG LoBM    0.114858  2.882012
ME5 BM2    -0.075392 -1.401893
ME5 BM3    -0.058550 -0.903427
ME5 BM4    -0.220175 -3.588987
BIG HiBM   -0.049386 -0.523162
GRS Test Statistic:
10.626742889101152
P-value:
7.285221315100898e-10


In [128]:
time_periods = port_25.shape[0]
num_portfolios = port_25.shape[1] - 1

# Initialize a DataFrame to store the alphas and t-values
alphas_and_tstats = pd.DataFrame(columns=['Alpha', 'T-Value'])
residuals = pd.DataFrame()

# Fama-French factors excluding the risk-free rate
factors = ['Mkt-RF', 'SMB', 'HML-DEV', 'RMW', 'CMA', 'UMD']

for column in port_25.columns:
    if column != 'date':
        # Regression: portfolio returns ~ Fama-French five factors
        Y = port_25[column] - fama6_dev['RF']  # Excess returns
        Y = Y.dropna()
        X = fama6_dev[factors]
        X = sm.add_constant(X)  # Add a constant term

        model = sm.OLS(Y, X).fit()
        alpha = model.params['const']
        t_value = model.tvalues
        
        # Collect the residuals
        residuals[column] = model.resid

        # Store alpha
        alphas_and_tstats.loc[column] = [model.params['const'], model.tvalues['const']]

# Calculate the covariance matrix of residuals
cov_matrix = residuals.cov().values * (time_periods - 1) / time_periods  # To get sample covariance

# Alphas as a vector
alphas_vector = alphas_and_tstats['Alpha'].values.flatten()

# Lambda is the average value of the Fama-French factors
lambda_ = fama6_dev[factors].mean(axis=0).values

# The GRS test statistic
GRS_stat = (time_periods - len(factors) - num_portfolios) / len(factors) * \
           (alphas_vector.T @ np.linalg.inv(cov_matrix) @ alphas_vector) / \
           (1 + lambda_.T @ lambda_)

# Calculate the p-value
from scipy.stats import f
df1 = len(factors)
df2 = time_periods - len(factors) - 1
p_value = 1 - f.cdf(GRS_stat, df1, df2)

# Display results
print("Alphas and T-values for each portfolio:")
print(alphas_and_tstats)
print("GRS Test Statistic:")
print(GRS_stat)
print("P-value:")
print(p_value)

Alphas and T-values for each portfolio:
               Alpha   T-Value
SMALL LoBM -0.170778 -1.875627
ME1 BM2     0.171318  2.456444
ME1 BM3     0.009159  0.175875
ME1 BM4     0.084259  1.538304
SMALL HiBM  0.045805  0.567037
ME2 BM1     0.037728  0.567383
ME2 BM2     0.074160  1.444487
ME2 BM3    -0.008288 -0.151939
ME2 BM4    -0.059647 -1.092705
ME2 BM5    -0.166455 -2.666321
ME3 BM1     0.114734  1.853921
ME3 BM2     0.045072  0.774998
ME3 BM3    -0.095457 -1.570862
ME3 BM4    -0.087013 -1.396088
ME3 BM5    -0.161287 -1.999393
ME4 BM1     0.253649  4.116447
ME4 BM2    -0.116443 -1.861718
ME4 BM3    -0.172138 -2.652049
ME4 BM4    -0.079356 -1.126142
ME4 BM5    -0.207495 -2.332723
BIG LoBM    0.213570  5.071133
ME5 BM2    -0.092252 -1.679097
ME5 BM3    -0.163501 -2.536103
ME5 BM4    -0.380395 -5.452243
BIG HiBM   -0.231183 -2.052332
GRS Test Statistic:
10.609763783057167
P-value:
2.621380890133196e-11


In [135]:
port_10 = pd.read_csv("10_Industry_Portfolios.csv", header=6)
port_10 = port_10.iloc[:1167]
port_10.rename(columns={"Unnamed: 0": "date"}, inplace=True)
port_10 = port_10.loc[(port_10['date'] >= '196307') & (port_10['date'] < '202312')].reset_index(drop=True)
port_10 = port_10.astype(np.float32)
port_10['date'] = port_10['date'].astype(np.int32)
port_10

Unnamed: 0,date,NoDur,Durbl,Manuf,Enrgy,HiTec,Telcm,Shops,Hlth,Utils,Other
0,196307,-0.48,-0.07,-1.39,2.30,-0.68,-0.25,-1.05,0.57,0.81,-1.59
1,196308,4.87,6.54,6.19,3.94,5.13,4.28,6.42,9.56,4.20,5.44
2,196309,-1.69,-0.25,-0.78,-3.65,0.14,2.36,0.93,-4.07,-2.50,-3.18
3,196310,2.66,10.71,2.54,-0.33,8.30,3.43,0.51,3.38,-0.67,1.39
4,196311,-1.12,-5.17,0.30,-1.15,-0.29,4.14,-1.25,-1.65,-1.02,0.10
...,...,...,...,...,...,...,...,...,...,...,...
718,202305,-5.35,13.52,-5.54,-9.51,8.23,-9.40,0.62,-3.67,-5.82,-2.65
719,202306,2.84,24.66,10.69,6.42,5.91,4.63,7.89,4.66,2.61,7.44
720,202307,2.32,2.73,3.19,7.34,4.30,0.98,2.28,-0.11,2.79,5.24
721,202308,-3.77,-4.31,-2.37,1.95,-1.68,0.14,-0.40,-0.22,-5.29,-3.35


In [136]:
time_periods = port_10.shape[0]
num_portfolios = port_10.shape[1] - 1

# Initialize a DataFrame to store the alphas and t-values
alphas_and_tstats = pd.DataFrame(columns=['Alpha', 'T-Value'])
residuals = pd.DataFrame()

# Fama-French factors excluding the risk-free rate
factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

for column in port_10.columns:
    if column != 'date':
        # Regression: portfolio returns ~ Fama-French five factors
        Y = port_10[column] - fama5['RF']  # Excess returns
        X = fama5[factors]
        X = sm.add_constant(X)  # Add a constant term

        model = sm.OLS(Y, X).fit()
        alpha = model.params['const']
        t_value = model.tvalues
        
        # Collect the residuals
        residuals[column] = model.resid

        # Store alpha
        alphas_and_tstats.loc[column] = [model.params['const'], model.tvalues['const']]

# Calculate the covariance matrix of residuals
cov_matrix = residuals.cov().values * (time_periods - 1) / time_periods  # To get sample covariance

# Alphas as a vector
alphas_vector = alphas_and_tstats['Alpha'].values.flatten()

# Lambda is the average value of the Fama-French factors
lambda_ = fama5[factors].mean(axis=0).values

# The GRS test statistic
GRS_stat = (time_periods - len(factors) - num_portfolios) / len(factors) * \
           (alphas_vector.T @ np.linalg.inv(cov_matrix) @ alphas_vector) / \
           (1 + lambda_.T @ lambda_)

# Calculate the p-value
from scipy.stats import f
df1 = len(factors)
df2 = time_periods - len(factors) - 1
p_value = 1 - f.cdf(GRS_stat, df1, df2)

# Display results
print("Alphas and T-values for each portfolio:")
print(alphas_and_tstats)
print("GRS Test Statistic:")
print(GRS_stat)
print("P-value:")
print(p_value)

Alphas and T-values for each portfolio:
          Alpha   T-Value
NoDur -0.094454 -1.176600
Durbl -0.197323 -1.127878
Manuf -0.197674 -3.253975
Enrgy -0.101493 -0.594925
HiTec  0.412763  4.074493
Telcm  0.013472  0.115439
Shops -0.023156 -0.248699
Hlth   0.163995  1.469812
Utils -0.065615 -0.550754
Other -0.185035 -2.970589
GRS Test Statistic:
5.199946777400814
P-value:
0.00010702136616935753


In [137]:
time_periods = port_10.shape[0]
num_portfolios = port_10.shape[1] - 1

# Initialize a DataFrame to store the alphas and t-values
alphas_and_tstats = pd.DataFrame(columns=['Alpha', 'T-Value'])
residuals = pd.DataFrame()

# Fama-French factors excluding the risk-free rate
factors = ['Mkt-RF', 'SMB', 'HML-DEV', 'RMW', 'CMA', 'UMD']

for column in port_10.columns:
    if column != 'date':
        # Regression: portfolio returns ~ Fama-French five factors
        Y = port_10[column] - fama6_dev['RF']  # Excess returns
        Y = Y.dropna()
        X = fama6_dev[factors]
        X = sm.add_constant(X)  # Add a constant term

        model = sm.OLS(Y, X).fit()
        alpha = model.params['const']
        t_value = model.tvalues
        
        # Collect the residuals
        residuals[column] = model.resid

        # Store alpha
        alphas_and_tstats.loc[column] = [model.params['const'], model.tvalues['const']]

# Calculate the covariance matrix of residuals
cov_matrix = residuals.cov().values * (time_periods - 1) / time_periods  # To get sample covariance

# Alphas as a vector
alphas_vector = alphas_and_tstats['Alpha'].values.flatten()

# Lambda is the average value of the Fama-French factors
lambda_ = fama6_dev[factors].mean(axis=0).values

# The GRS test statistic
GRS_stat = (time_periods - len(factors) - num_portfolios) / len(factors) * \
           (alphas_vector.T @ np.linalg.inv(cov_matrix) @ alphas_vector) / \
           (1 + lambda_.T @ lambda_)

# Calculate the p-value
from scipy.stats import f
df1 = len(factors)
df2 = time_periods - len(factors) - 1
p_value = 1 - f.cdf(GRS_stat, df1, df2)

# Display results
print("Alphas and T-values for each portfolio:")
print(alphas_and_tstats)
print("GRS Test Statistic:")
print(GRS_stat)
print("P-value:")
print(p_value)

Alphas and T-values for each portfolio:
          Alpha   T-Value
NoDur -0.051006 -0.620400
Durbl -0.114658 -0.671080
Manuf -0.175582 -2.819367
Enrgy -0.341363 -1.994059
HiTec  0.555729  5.339196
Telcm -0.002747 -0.023376
Shops  0.081350  0.861343
Hlth   0.257223  2.267521
Utils -0.180254 -1.496455
Other -0.255412 -3.682611
GRS Test Statistic:
5.713508648017589
P-value:
8.111973652580318e-06
