In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statistics
factor_data=pd.read_excel('factor_pricing_data_monthly.xlsx', sheet_name='factors (excess returns)')
factor_data = factor_data.sort_values('Date').reset_index(drop=True)


## The Factors

1. Analyze the factors, similar to how you analyzed the three Fama-French factors in Homework 4.

In [2]:
factor_data= factor_data[['MKT','SMB','HML','RMW','CMA','UMD']]
def performance_stats(series):
    mean = series.mean()
    vol = series.std()
    sharpe = mean / vol
    var_05 = np.percentile(series, 5)
    return pd.Series({
        'Mean': mean,
        'Volatility': vol,
        'Sharpe': sharpe,
        'VaR(0.05)': var_05
    })

factors = ['MKT','SMB','HML','RMW','CMA','UMD']
results = {}
for factor in factors:
    results[factor] = pd.DataFrame({
        'Performance Statistics': performance_stats(factor_data[factor])
    })
    print(f"\nPerformance Statistics for {factor}")
    display(results[factor].round(4))


Performance Statistics for MKT


Unnamed: 0,Performance Statistics
Mean,0.0073
Volatility,0.0451
Sharpe,0.1619
VaR(0.05),-0.0724



Performance Statistics for SMB


Unnamed: 0,Performance Statistics
Mean,0.0005
Volatility,0.0292
Sharpe,0.0174
VaR(0.05),-0.0431



Performance Statistics for HML


Unnamed: 0,Performance Statistics
Mean,0.0022
Volatility,0.0314
Sharpe,0.0691
VaR(0.05),-0.0421



Performance Statistics for RMW


Unnamed: 0,Performance Statistics
Mean,0.0037
Volatility,0.0239
Sharpe,0.1533
VaR(0.05),-0.0285



Performance Statistics for CMA


Unnamed: 0,Performance Statistics
Mean,0.0024
Volatility,0.0209
Sharpe,0.1127
VaR(0.05),-0.0278



Performance Statistics for UMD


Unnamed: 0,Performance Statistics
Mean,0.005
Volatility,0.0443
Sharpe,0.1135
VaR(0.05),-0.0673


2. Based on the factor statistics above, answer the following.  
- Does each factor have a positive risk premium (positive expected excess return)?  
- How have the factors performed since the time of the case, (2015-present)?

3. Report the correlation matrix across the six factors.  
- Does the construction method succeed in keeping correlations small?  
- Fama and French say that HML is somewhat redundant in their 5-factor model. Does this seem to be the case?  

In [3]:
corr_full = factor_data[factors].corr()
print("Correlation matrix (Full sample):")
display(corr_full)

Correlation matrix (Full sample):


Unnamed: 0,MKT,SMB,HML,RMW,CMA,UMD
MKT,1.0,0.226997,-0.207918,-0.250639,-0.346542,-0.179352
SMB,0.226997,1.0,-0.021819,-0.411946,-0.051099,-0.06094
HML,-0.207918,-0.021819,1.0,0.219401,0.676727,-0.215523
RMW,-0.250639,-0.411946,0.219401,1.0,0.138566,0.076694
CMA,-0.346542,-0.051099,0.676727,0.138566,1.0,9.4e-05
UMD,-0.179352,-0.06094,-0.215523,0.076694,9.4e-05,1.0


4. Report the tangency weights for a portfolio of these 6 factors.  
- Which factors seem most important? And Least?  
- Are the factors with low mean returns still useful?  
- Re-do the tangency portfolio, but this time only include MKT, SMB, HML, and UMD. Which factors get high/low tangency weights now?  
- What do you conclude about the importance or unimportance of these styles?  



In [4]:
def tangency_weights(returns, cov_mat = 1):
    cov = returns.cov()
    if cov_mat == 1:
        cov_used = cov
    else:
        cov_diag = np.diag(np.diag(cov))
        cov_used = cov_mat * cov + (1 - cov_mat) * cov_diag
    cov_inv = np.linalg.inv(cov_used * 12)

    # --- Mean returns (annualized) ---
    mu = returns.mean() * 12

    # --- Tangency portfolio computation ---
    w_unnormalized = cov_inv @ mu
    w_tangency = w_unnormalized / np.sum(w_unnormalized)

    # --- Output as DataFrame ---
    tangency_wts = pd.DataFrame(w_tangency, index=returns.columns, columns=['Tangency Weights'])
    return tangency_wts.round(4)

In [5]:
returns = factor_data[factors]

tangency_wts = tangency_weights(returns)
display(tangency_wts)

Unnamed: 0,Tangency Weights
MKT,0.2186
SMB,0.0668
HML,-0.0212
RMW,0.3018
CMA,0.3214
UMD,0.1125


In [6]:
#expected annualized return, volatility, and Sharpe ratio of that tangency portfolio
cov = returns.cov() * 12
mu = returns.mean() * 12
w = tangency_wts['Tangency Weights'].values

port_return = w.T @ mu
port_vol = np.sqrt(w.T @ cov @ w)
sharpe = port_return / port_vol

print(f"Tangency Portfolio: Return = {port_return:.4f}, Vol = {port_vol:.4f}, Sharpe = {sharpe:.4f}")


Tangency Portfolio: Return = 0.0482, Vol = 0.0401, Sharpe = 1.2013


In [7]:
#Redone tangency portfolio only MKT, SMB, HML, and UMD
factors_reduced = ['MKT','SMB','HML','UMD']
returns_reduced = factor_data[factors_reduced]
tangency_wts_reduced = tangency_weights(returns_reduced)
display(tangency_wts_reduced)

Unnamed: 0,Tangency Weights
MKT,0.3765
SMB,-0.0512
HML,0.3653
UMD,0.3094


## Testing Modern LPMs 


In [10]:
#testing different factor models
#CAPM (MKT)
returns_capm = factor_data[['MKT']]
#3-Factor (MKT, SMB, HML)
returns_3f = factor_data[['MKT','SMB','HML']]
#5-Factor (MKT, SMB, HML, RMW, CMA)
returns_5f = factor_data[['MKT','SMB','HML','RMW','CMA']]
#AQR (MKT, HML, RMW, UMD)
returns_aqr = factor_data[['MKT','HML','RMW','UMD']]
models = {
    'CAPM': returns_capm,
    '3-Factor': returns_3f,
    '5-Factor': returns_5f,
    'AQR': returns_aqr
}
#monthly excess return data on n = 49 portfolios
# test aqr factor model using time series test
portfolio_data=pd.read_excel('factor_pricing_data_monthly.xlsx', sheet_name='portfolios (excess returns)')
portfolio_data = portfolio_data.sort_values('Date').reset_index(drop=True)
portfolio_returns = portfolio_data.drop(columns=['Date'])
n_portfolios = portfolio_returns.shape[1]
#Test the AQR 4-Factor Model using the time-series test
#report estimated alpha and r-squared for each portfolio
import statsmodels.api as sm
def time_series_test(portfolio_returns, factor_returns):
    alphas = []
    r_squared = []
    for i in range(portfolio_returns.shape[1]):
        y = portfolio_returns.iloc[:, i]
        X = sm.add_constant(factor_returns)
        model = sm.OLS(y, X).fit()
        alphas.append(model.params['const'])
        r_squared.append(model.rsquared)
    results_df = pd.DataFrame({
        'Alpha': alphas,
        'R-squared': r_squared
    }, index=portfolio_returns.columns)
    return results_df
aqr_results = time_series_test(portfolio_returns, returns_aqr)
print("AQR 4-Factor Model Time-Series Test Results:")
display(aqr_results.round(4))



AQR 4-Factor Model Time-Series Test Results:


Unnamed: 0,Alpha,R-squared
Agric,0.001,0.3421
Food,0.0001,0.4551
Soda,0.0013,0.3025
Beer,0.0008,0.4148
Smoke,0.0034,0.2654
Toys,-0.0028,0.5102
Fun,0.0033,0.6072
Books,-0.0031,0.6889
Hshld,-0.0011,0.5547
Clths,-0.0019,0.619


In [11]:
#calculate the mean absolute alpha across all portfolios
mean_abs_alpha = np.mean(np.abs(aqr_results['Alpha']))
print(f"Mean Absolute Alpha across all portfolios: {mean_abs_alpha:.4f}")


Mean Absolute Alpha across all portfolios: 0.0021


In [12]:
#test the capm, 3-factor, and 5-factor models using the time-series test
model_results = {}
for model_name, factor_returns in models.items():
    results_df = time_series_test(portfolio_returns, factor_returns)
    model_results[model_name] = results_df
    print(f"\n{model_name} Model Time-Series Test Results:")
    display(results_df.round(4))
#calculate and compare the mean absolute alpha across all portfolios for each model
for model_name, results_df in model_results.items():
    mean_abs_alpha = np.mean(np.abs(results_df['Alpha']))
    print(f"Mean Absolute Alpha for {model_name} Model: {mean_abs_alpha:.4f}")
    


CAPM Model Time-Series Test Results:


Unnamed: 0,Alpha,R-squared
Agric,0.002,0.3333
Food,0.0033,0.3541
Soda,0.0038,0.2449
Beer,0.0043,0.3244
Smoke,0.0072,0.1821
Toys,-0.0029,0.4963
Fun,0.0009,0.5861
Books,-0.002,0.6551
Hshld,0.0014,0.4862
Clths,-0.0004,0.5607



3-Factor Model Time-Series Test Results:


Unnamed: 0,Alpha,R-squared
Agric,0.0017,0.3573
Food,0.0024,0.4041
Soda,0.0027,0.2734
Beer,0.0038,0.3518
Smoke,0.0058,0.2312
Toys,-0.0029,0.5305
Fun,0.001,0.5952
Books,-0.0029,0.6911
Hshld,0.001,0.5043
Clths,-0.0009,0.5739



5-Factor Model Time-Series Test Results:


Unnamed: 0,Alpha,R-squared
Agric,0.0006,0.3619
Food,-0.0005,0.4781
Soda,-0.0001,0.3064
Beer,0.0003,0.4336
Smoke,0.0017,0.2944
Toys,-0.0052,0.5509
Fun,0.002,0.5995
Books,-0.0041,0.7022
Hshld,-0.002,0.5819
Clths,-0.0037,0.6291



AQR Model Time-Series Test Results:


Unnamed: 0,Alpha,R-squared
Agric,0.001,0.3421
Food,0.0001,0.4551
Soda,0.0013,0.3025
Beer,0.0008,0.4148
Smoke,0.0034,0.2654
Toys,-0.0028,0.5102
Fun,0.0033,0.6072
Books,-0.0031,0.6889
Hshld,-0.0011,0.5547
Clths,-0.0019,0.619


Mean Absolute Alpha for CAPM Model: 0.0017
Mean Absolute Alpha for 3-Factor Model: 0.0020
Mean Absolute Alpha for 5-Factor Model: 0.0026
Mean Absolute Alpha for AQR Model: 0.0021


In [None]:
#should fama and french use the momentum factor in their 5-factor model?
#Based on the results from the time-series tests, we can compare the 
# mean absolute alphas across the different models. If the AQR model,
#  which includes the momentum factor (UMD), shows a significantly lower
#  mean absolute alpha compared to the Fama-French 5-factor model, it 
# would suggest that including the momentum factor improves the model's 
# explanatory power. This would indicate that the momentum factor captures 
# additional variation in portfolio returns that the 5-factor model does 
# not account for, supporting the case for its inclusion in asset pricing models.

In [13]:
#report the average (across n = 49 portfolios) R-squared for each model
for model_name, results_df in model_results.items():
    avg_r_squared = results_df['R-squared'].mean()
    print(f"Average R-squared for {model_name} Model: {avg_r_squared:.4f}") 
    

Average R-squared for CAPM Model: 0.5226
Average R-squared for 3-Factor Model: 0.5679
Average R-squared for 5-Factor Model: 0.5918
Average R-squared for AQR Model: 0.5719


In [19]:
#test three models using the cross-sectional test
#calculate the annualized mean excess return for each portfolio
annualizedMean = portfolio_returns.mean() * 12
#calculate the beta of each portfolio with respect to the market factor (MKT)
market_returns = factor_data['MKT']
betas = []
for i in range(portfolio_returns.shape[1]):
    y = portfolio_returns.iloc[:, i]
    X = sm.add_constant(market_returns)
    model = sm.OLS(y, X).fit()
    betas.append(model.params['MKT'])
betas = pd.Series(betas, index=portfolio_returns.columns)
#run the cross-sectional regression of annualized mean excess returns on betas
import statsmodels.api as sm
df_crossCAPM = pd.concat([annualizedMean.rename('Excess Return'),betas.rename('Beta')], axis=1)
X1 = sm.add_constant(df_crossCAPM['Beta'])
y1 = df_crossCAPM['Excess Return']
model = sm.OLS(y1, X1).fit()
r_squared = model.rsquared
intercept = model.params['const']
slope = model.params['Beta']
print(f"R-squared: {r_squared:.4f}")
print(f"Intercept: {intercept:.6f}")
print(f"Market Risk Premium: {slope:.6f}")

#do the same for the 3-factor and 5-factor models
#3-Factor Model
#calculate the betas of each portfolio with respect to SMB and HML --> should I add MKT?
smh_betas = {'MKT': [], 'SMB': [], 'HML': []}
for i in range(portfolio_returns.shape[1]):
    y = portfolio_returns.iloc[:, i]
    X = sm.add_constant(factor_data[['MKT', 'SMB', 'HML']])
    model = sm.OLS(y, X).fit()
    smh_betas['MKT'].append(model.params['MKT'])
    smh_betas['SMB'].append(model.params['SMB'])
    smh_betas['HML'].append(model.params['HML'])
smh_betas_df = pd.DataFrame(smh_betas, index=portfolio_returns.columns)
#run the cross-sectional regression of annualized mean excess returns on betas
df_cross3F = pd.concat([annualizedMean.rename('Excess Return'), smh_betas_df], axis=1)
X2 = sm.add_constant(df_cross3F[['MKT', 'SMB', 'HML']])
y2 = df_cross3F['Excess Return']
model2 = sm.OLS(y2, X2).fit()
r_squared2 = model2.rsquared
intercept2 = model2.params['const']
slope_mkt2 = model2.params['MKT']
slope_smb = model2.params['SMB']
slope_hml = model2.params['HML']
print(f"3-Factor Model R-squared: {r_squared2:.4f}")
print(f"Intercept: {intercept2:.6f}")
print(f"MKT Risk Premium: {slope_mkt2:.6f}")
print(f"SMB Risk Premium: {slope_smb:.6f}")
print(f"HML Risk Premium: {slope_hml:.6f}")
#5-Factor Model
#calculate the betas of each portfolio with respect to SMB, HML, RMW, and CMA --> should I add MKT?
factors_5F = {'MKT': [],'SMB': [], 'HML': [], 'RMW': [], 'CMA': []}
for i in range(portfolio_returns.shape[1]):
    y = portfolio_returns.iloc[:, i]
    X = sm.add_constant(factor_data[['MKT', 'SMB', 'HML', 'RMW', 'CMA']])
    model = sm.OLS(y, X).fit()
    factors_5F['MKT'].append(model.params['MKT'])
    factors_5F['SMB'].append(model.params['SMB'])
    factors_5F['HML'].append(model.params['HML'])
    factors_5F['RMW'].append(model.params['RMW'])
    factors_5F['CMA'].append(model.params['CMA'])
factors_5F_df = pd.DataFrame(factors_5F, index=portfolio_returns.columns)
#run the cross-sectional regression of annualized mean excess returns on betas
df_cross5F = pd.concat([annualizedMean.rename('Excess Return'), factors_5F_df], axis=1)
X3 = sm.add_constant(df_cross5F[['MKT','SMB', 'HML', 'RMW', 'CMA']])
y3 = df_cross5F['Excess Return']
model3 = sm.OLS(y3, X3).fit()
r_squared3 = model3.rsquared
intercept3 = model3.params['const']
slope_mkt3 = model3.params['MKT']
slope_smb3 = model3.params['SMB']
slope_hml3 = model3.params['HML']
slope_rmw = model3.params['RMW']
slope_cma = model3.params['CMA']
print(f"5-Factor Model R-squared: {r_squared3:.4f}")
print(f"Intercept: {intercept3:.6f}")
print(f"MKT Risk Premium: {slope_mkt3:.6f}")
print(f"SMB Risk Premium: {slope_smb3:.6f}")
print(f"HML Risk Premium: {slope_hml3:.6f}")
print(f"RMW Risk Premium: {slope_rmw:.6f}")
print(f"CMA Risk Premium: {slope_cma:.6f}")
#calculate the betas of each portfolio with respect to MKT, HML, RMW, and UMD for the AQR model
factors_aqr = {'MKT': [], 'HML': [], 'RMW': [], 'UMD': []}
for i in range(portfolio_returns.shape[1]):
    y = portfolio_returns.iloc[:, i]
    X = sm.add_constant(factor_data[['MKT', 'HML', 'RMW', 'UMD']])
    model = sm.OLS(y, X).fit()
    factors_aqr['MKT'].append(model.params['MKT'])
    factors_aqr['HML'].append(model.params['HML'])
    factors_aqr['RMW'].append(model.params['RMW'])
    factors_aqr['UMD'].append(model.params['UMD'])
factors_aqr_df = pd.DataFrame(factors_aqr, index=portfolio_returns.columns)
#run the cross-sectional regression of annualized mean excess returns on betas
df_crossAQR = pd.concat([annualizedMean.rename('Excess Return'), factors_aqr_df], axis=1)
X4 = sm.add_constant(df_crossAQR[['MKT', 'HML', 'RMW', 'UMD']])
y4 = df_crossAQR['Excess Return']
model4 = sm.OLS(y4, X4).fit()
r_squared4 = model4.rsquared
intercept4 = model4.params['const']
slope_mkt4 = model4.params['MKT']
slope_hml4 = model4.params['HML']
slope_rmw4 = model4.params['RMW']
slope_umd = model4.params['UMD']
print(f"AQR Model R-squared: {r_squared4:.4f}")
print(f"Intercept: {intercept4:.6f}")
print(f"MKT Risk Premium: {slope_mkt4:.6f}")
print(f"HML Risk Premium: {slope_hml4:.6f}")
print(f"RMW Risk Premium: {slope_rmw4:.6f}")
print(f"UMD Risk Premium: {slope_umd:.6f}")



R-squared: 0.0093
Intercept: 0.083200
Market Risk Premium: 0.007920
3-Factor Model R-squared: 0.3504
Intercept: 0.062727
MKT Risk Premium: 0.038844
SMB Risk Premium: -0.039625
HML Risk Premium: -0.021037
5-Factor Model R-squared: 0.3765
Intercept: 0.059852
MKT Risk Premium: 0.040320
SMB Risk Premium: -0.041428
HML Risk Premium: -0.025943
RMW Risk Premium: 0.018705
CMA Risk Premium: -0.022083
AQR Model R-squared: 0.2066
Intercept: 0.075536
MKT Risk Premium: 0.017193
HML Risk Premium: -0.032258
RMW Risk Premium: 0.017549
UMD Risk Premium: 0.000286


In [26]:
# Annualize monthly factor returns
time_series_premia = factor_data.mean() * 12  # multiply by 12 for annualization
print("Time-Series Factor Premia (annualized):")
print(time_series_premia)

Time-Series Factor Premia (annualized):
MKT    0.087552
SMB    0.006120
HML    0.026039
RMW    0.044047
CMA    0.028288
UMD    0.060313
dtype: float64


In [25]:
# CAPM cross-sectional residuals
resid_CAPM = model.resid  # <-- directly from the fitted OLS model
MAE_CAPM = np.mean(np.abs(resid_CAPM))
print(f"MAE of CAPM cross-sectional residuals: {MAE_CAPM:.6f}")

# 3-Factor
resid_3F = model2.resid
MAE_3F = np.mean(np.abs(resid_3F))
print(f"MAE of 3-Factor cross-sectional residuals: {MAE_3F:.6f}")

# 5-Factor
resid_5F = model3.resid
MAE_5F = np.mean(np.abs(resid_5F))
print(f"MAE of 5-Factor cross-sectional residuals: {MAE_5F:.6f}")

# AQR
resid_AQR = model4.resid
MAE_AQR = np.mean(np.abs(resid_AQR))
print(f"MAE of AQR cross-sectional residuals: {MAE_AQR:.6f}")


MAE of CAPM cross-sectional residuals: 0.028908
MAE of 3-Factor cross-sectional residuals: 0.012002
MAE of 5-Factor cross-sectional residuals: 0.011979
MAE of AQR cross-sectional residuals: 0.013608
