In [1]:
import pandas as pd

factors = pd.read_parquet("../data/processed/factors_monthly.parquet")
ports = pd.read_parquet("../data/processed/portfolios_25_5x5_monthly.parquet")  # created in build_portfolios_returns_25.5x5.py

display(factors.head())
display(ports.head())

print("factors:", factors["date"].min(), "→", factors["date"].max(), "rows", len(factors))
print("ports  :", ports["date"].min(), "→", ports["date"].max(), "rows", len(ports))
print("n portfolios:", ports.shape[1] - 1)

Unnamed: 0,date,Mkt-RF,SMB,HML,RF,Mom
0,1927-01-01,-0.0005,-0.0032,0.0458,0.0025,0.0057
1,1927-02-01,0.0417,0.0007,0.0272,0.0026,-0.015
2,1927-03-01,0.0014,-0.0177,-0.0238,0.003,0.0352
3,1927-04-01,0.0047,0.0039,0.0065,0.0025,0.0436
4,1927-05-01,0.0545,0.0155,0.048,0.003,0.0278


Unnamed: 0,date,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
0,1926-07-01,0.058276,-0.017006,0.005118,-0.021477,0.019583,0.012118,0.024107,0.006056,-0.026082,...,0.015376,0.01546,0.013389,0.002765,0.024678,0.033248,0.060909,0.020285,0.031263,0.005623
1,1926-08-01,-0.020206,-0.080282,0.013968,0.021483,0.085104,0.02362,-0.007525,0.038984,0.002299,...,0.013858,0.038587,0.019738,0.021336,0.053422,0.010169,0.041975,0.019769,0.054924,0.077576
2,1926-09-01,-0.048291,-0.026806,-0.043417,-0.032683,0.008586,-0.026849,-0.005252,0.010789,-0.032877,...,0.016897,-0.005246,-0.017724,0.014806,0.00873,-0.012951,0.03661,0.001384,-0.007497,-0.024284
3,1926-10-01,-0.093633,-0.035519,-0.035024,0.034413,-0.025452,-0.028014,-0.044191,-0.050767,-0.080271,...,-0.039136,-0.026528,-0.021058,-0.032532,-0.053525,-0.027382,-0.030061,-0.022467,-0.046725,-0.058129
4,1926-11-01,0.055888,0.041877,0.024384,-0.044495,0.00511,0.031023,-0.017317,0.030425,0.049538,...,0.034492,0.023823,0.037315,0.051102,0.018213,0.044331,0.025355,0.01528,0.036596,0.025636


factors: 1927-01-01 00:00:00 → 2025-11-01 00:00:00 rows 1187
ports  : 1926-07-01 00:00:00 → 2025-11-01 00:00:00 rows 1193
n portfolios: 25


In [2]:
import statsmodels.api as sm

df = ports.merge(factors, on="date", how="inner")
col = [c for c in ports.columns if c != "date"][0]

y = df[col] - df["RF"]  # excess return
X = sm.add_constant(df[["Mkt-RF", "SMB", "HML"]])

res = sm.OLS(y, X).fit(cov_type="HAC", cov_kwds={"maxlags": 3})
print(col)
print(res.summary())

SMALL LoBM
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.664
Model:                            OLS   Adj. R-squared:                  0.663
Method:                 Least Squares   F-statistic:                     294.0
Date:                Sun, 25 Jan 2026   Prob (F-statistic):          1.37e-142
Time:                        02:03:27   Log-Likelihood:                 1488.3
No. Observations:                1187   AIC:                            -2969.
Df Residuals:                    1183   BIC:                            -2948.
Df Model:                           3                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0068      0.001     -4.7

In [6]:
panel_res_ff3 = pd.read_csv('../reports/ff3_25_5x5_results.csv') # load results for all portfolios
panel_res_ff3.head() # display first few rows

Unnamed: 0,portfolio,n,alpha,alpha_t,beta_mkt,beta_mkt_t,beta_smb,beta_smb_t,beta_hml,beta_hml_t,r2
0,BIG HiBM,1187,-0.00159,-1.543908,1.175775,33.631071,-0.142866,-2.092104,0.996226,17.612441,0.835471
1,BIG LoBM,1187,0.001018,2.939126,1.031247,111.434891,-0.150407,-6.241646,-0.269215,-16.353286,0.956016
2,ME1 BM2,1187,-0.003864,-3.698154,1.071569,38.103683,1.527719,9.767417,0.207719,3.203007,0.823581
3,ME1 BM3,1187,-0.001148,-1.686165,1.04938,28.240314,1.237211,24.970157,0.493332,12.645775,0.889145
4,ME1 BM4,1187,0.000737,1.374042,0.943929,48.866868,1.231773,13.124119,0.574384,16.555899,0.929893


### Carhart Model

In [7]:
# One Portfolio Carhart
df = ports.merge(factors, on="date", how="inner")
col = [c for c in ports.columns if c != "date"][0]

y = df[col] - df["RF"]  # excess return
X = sm.add_constant(df[["Mkt-RF", "SMB", "HML", "Mom"]])

res = sm.OLS(y, X).fit(cov_type="HAC", cov_kwds={"maxlags": 3})
print(col)
print(res.summary())

SMALL LoBM
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.667
Model:                            OLS   Adj. R-squared:                  0.666
Method:                 Least Squares   F-statistic:                     212.9
Date:                Sun, 25 Jan 2026   Prob (F-statistic):          1.32e-137
Time:                        02:16:50   Log-Likelihood:                 1493.9
No. Observations:                1187   AIC:                            -2978.
Df Residuals:                    1182   BIC:                            -2952.
Df Model:                           4                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0053      0.002     -3.2

In [8]:
# create carhart_25_5x5_results.csv by panel_carhart_25_5x5.py
panel_res_carhart = pd.read_csv('../reports/carhart_25_5x5_results.csv') # load carhart results for all portfolios
panel_res_carhart.head() # display first few rows

Unnamed: 0,portfolio,n,alpha,alpha_t,beta_mkt,beta_mkt_t,beta_smb,beta_smb_t,beta_hml,beta_hml_t,beta_mom,beta_mom_t,r2
0,BIG HiBM,1187,-0.000367,-0.358699,1.146388,35.630448,-0.150951,-2.333951,0.93637,18.71172,-0.131064,-3.040813,0.83948
1,BIG LoBM,1187,0.001176,3.281886,1.027453,111.396359,-0.15145,-6.291539,-0.276943,-18.244977,-0.016922,-1.270135,0.956183
2,ME1 BM2,1187,-0.00378,-3.734827,1.069561,35.522587,1.527167,9.646801,0.203628,3.012383,-0.008957,-0.175911,0.823595
3,ME1 BM3,1187,8.5e-05,0.113272,1.019741,28.497756,1.229058,27.156555,0.432963,12.347326,-0.132186,-4.028023,0.892868
4,ME1 BM4,1187,0.00088,1.627608,0.940503,51.145016,1.230831,12.945694,0.567407,14.682952,-0.015278,-0.620465,0.92995


In [12]:
merged_res = panel_res_carhart.merge(panel_res_ff3, on='portfolio',suffixes=('_carhart', '_ff3'))  # merge ff3 and carhart results
merged_res.head()  # display first few rows of merged results
merged_res[['portfolio', 'alpha_carhart', 'alpha_ff3', 'alpha_t_carhart', 'alpha_t_ff3']]  # display alphas and t-stats for both models

Unnamed: 0,portfolio,alpha_carhart,alpha_ff3,alpha_t_carhart,alpha_t_ff3
0,BIG HiBM,-0.000367,-0.00159,-0.358699,-1.543908
1,BIG LoBM,0.001176,0.001018,3.281886,2.939126
2,ME1 BM2,-0.00378,-0.003864,-3.734827,-3.698154
3,ME1 BM3,8.5e-05,-0.001148,0.113272,-1.686165
4,ME1 BM4,0.00088,0.000737,1.627608,1.374042
5,ME2 BM1,-0.00197,-0.002193,-2.989762,-3.421228
6,ME2 BM2,0.000622,0.000214,1.161466,0.406894
7,ME2 BM3,0.000311,0.000232,0.650224,0.459973
8,ME2 BM4,0.000317,0.000309,0.709182,0.735308
9,ME2 BM5,0.00046,0.000272,0.90901,0.579427
