In [1]:

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

In [2]:

# Load stocks returns data
stocks_df = pd.read_csv('data/optimal_portfolio_returns.csv', parse_dates=['Date'])
stocks_df = stocks_df.sort_values(by='Date')
stocks_df.set_index('Date', inplace=True)
tickers = stocks_df.columns[1:]

# Fama-French 3 Factor Model

The Fama-French 3 Factor Model, like the CAPM, can be seen as a metric for determining the excess return that should be expected from holding a risky asset such as an equity. The model was introduced in Fama & French's paper titled "Common risk factors in the returns on stocks and bonds" (1993), published in the Journal of Financial Economics. It tends to outperform the CAPM, managing to explain more of the variance in the data. Also, it makes sense that investors should be rewarded more for holding smaller companies than large ones, or that investors should be compensated for holding high-value assets rather than inflated popular ones into which value has already been priced in.

$ R_P = RFR + \beta_M(R_M - RFR) + b_{SMB} \cdot SMB + b_{HML} \cdot HML + \alpha$,

where: 
- $SMB$ is the Small minus Big factor,
- $HML$ is the High minus Low factor,
- $\alpha$ is the performance which remains unexplained by all other factors
- $R_P$ is the expected portfolio return,
- $\beta_M$ is the portfolio's exposure to the broad market return.

In [3]:

# The CAPM accomodates one factor (market excess) to explain portfolio returns
# The Fama-French model accomodates more -- initially 3, now 5 -- well researched factors
# The market excess remains one of the Fama-French factors, typically the most important one!
# Fama-French data goes back to 1926. Merge/join on date with current stock returns (e.g. equal-weighted) to get condensed data set
FamaFrench_data = pd.read_csv('data/FF_factors_daily.csv', parse_dates=['Date'])
FamaFrench_data.set_index('Date', inplace=True)
FamaFrench_data = pd.merge(FamaFrench_data, stocks_df[['EqualPortfolio']], left_on='Date', right_on='Date')
FamaFrench_data.rename(columns={'Mkt-RF':'MarketExcess'}, inplace=True)
FamaFrench_data['PortfolioExcess'] = FamaFrench_data['EqualPortfolio'] - FamaFrench_data['RF']

In [4]:
FamaFrench_data.head()

Unnamed: 0_level_0,MarketExcess,SMB,HML,RF,EqualPortfolio,PortfolioExcess
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-11-18,-0.93,0.0,0.0,0.0,0.0,0.0
2020-11-19,0.59,0.48,-0.74,0.0,0.008681,0.008681
2020-11-20,-0.51,0.68,-0.61,0.0,-0.010867,-0.010867
2020-11-23,0.83,0.89,1.57,0.0,-0.000434,-0.000434
2020-11-24,1.56,-0.15,2.75,0.0,0.017612,0.017612


In [5]:
# Fit the excess returns data to the model
model = smf.ols(formula='PortfolioExcess ~ MarketExcess + SMB + HML', data=FamaFrench_data)
fitted_model = model.fit()
print(fitted_model.params)

Intercept       0.001014
MarketExcess    0.012061
SMB            -0.002154
HML            -0.005368
dtype: float64


In [6]:

adjusted_r_sq = fitted_model.rsquared_adj
print("The model's adjusted r-squared is {:.2f}".format(adjusted_r_sq*100)+"%.")

The model's adjusted r-squared is 82.54%.


In [7]:

# Are the factor coeffients statistically significant?
# SMB coeffiecient is neg(-) and significant --> portfolio dominated by large caps
# HML coeffiecient is neg(-) and significant --> portfolio dominated by popular stocks
def pval_significance(factor):
    pval = fitted_model.pvalues[factor]
    if pval < 0.05:
        significant_msg = 'significant.'
    else:
        significant_msg = 'not significant.'
    factor_coefficient = fitted_model.params[factor]
    print("The "+factor+" coefficient is "+str(factor_coefficient)+" and is "+significant_msg)
    return 0

factors = ['SMB', 'HML']
for f in factors:
    pval_significance(f)

The SMB coefficient is -0.0021540054176143186 and is significant.
The HML coefficient is -0.0053682633229130815 and is significant.


In [8]:

# Positive alpha is construed as outperformance due to skill or timing, not due to luck or the broad market
pos_alpha = fitted_model.params['Intercept']
pos_alpha_annualised = ((1+pos_alpha)**252) - 1
print("The portfolio's positive alpha is {:.2f}".format(pos_alpha_annualised*100)+"%.")

The portfolio's positive alpha is 29.10%.


# Fama-French 5 Factor Model

Fama and French extended their model to 5 factors in, with 2 extra factors being added. More information can be found in their paper, "A Five-Factor Asset Pricing Model" (2014).

$ R_P = RFR + \beta_M(R_M - RFR) + b_{SMB} \cdot SMB + b_{HML} \cdot HML + b_{RMW} \cdot RMW + b_{CMA} \cdot CMA + \alpha$,

where:
- $RMW$ is the robust minus weak operating profitability factor,
- $CMA$ is the conservative minus aggressive investment factor.

It is worth noting that some scholars, such as Cliff Asness, suggested Momentum as a potential factor in 2016 (see Fama on Momentum:https://www.aqr.com/Insights/Perspectives/Fama-on-Momentum). Fama and French disagreed. The 5-Factor model has also been criticised for other shortcomings, such as failing to explain returns results in the UK.

In [9]:

# Load FF 5 factor data
FF_5factor_data = pd.read_csv('data/FF_5factor_daily.csv', parse_dates=['Date'])
FF_5factor_data.set_index('Date', inplace=True)
FF_5factor_data = pd.merge(FF_5factor_data, stocks_df[['EqualPortfolio']], left_on='Date', right_on='Date')
FF_5factor_data.rename(columns={'Mkt-RF':'MarketExcess'}, inplace=True)
FF_5factor_data['PortfolioExcess'] = FF_5factor_data['EqualPortfolio'] - FF_5factor_data['RF']

In [10]:
FF_5factor_data.head()

Unnamed: 0_level_0,MarketExcess,SMB,HML,RMW,CMA,RF,EqualPortfolio,PortfolioExcess
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-11-18,-0.93,-0.03,0.0,0.07,-0.4,0.0,0.0,0.0
2020-11-19,0.59,0.29,-0.74,-0.64,-0.2,0.0,0.008681,0.008681
2020-11-20,-0.51,0.6,-0.61,-0.73,-0.11,0.0,-0.010867,-0.010867
2020-11-23,0.83,1.42,1.57,0.31,0.05,0.0,-0.000434,-0.000434
2020-11-24,1.56,0.48,2.75,0.53,0.53,0.0,0.017612,0.017612


In [11]:

model = smf.ols(formula='PortfolioExcess ~ MarketExcess + SMB + HML + RMW + CMA', data=FF_5factor_data)
fitted_model = model.fit()
print(fitted_model.params)

Intercept       0.000979
MarketExcess    0.012205
SMB            -0.002174
HML            -0.005089
RMW             0.000655
CMA             0.000106
dtype: float64


In [12]:

# NOTE: Adding more factors increases model performance very marginally
adjusted_r_sq = fitted_model.rsquared_adj
print("The model's adjusted r-squared is {:.2f}".format(adjusted_r_sq*100)+"%.")

The model's adjusted r-squared is 82.92%.


In [13]:

# NOTE: Although positively influencing the model result, the new factors are not statistically significant
def pval_significance(factor):
    pval = fitted_model.pvalues[factor]
    if pval < 0.05:
        significant_msg = 'significant.'
    else:
        significant_msg = 'not significant.'
    factor_coefficient = fitted_model.params[factor]
    print("The "+factor+" coefficient is "+str(factor_coefficient)+" and is "+significant_msg)
    return 0

factors = ['RMW', 'CMA']
for f in factors:
    pval_significance(f)

The RMW coefficient is 0.0006547954241482954 and is not significant.
The CMA coefficient is 0.0001061740485300681 and is not significant.


In [14]:

# NOTE: More factors have led to a lower portfolio alpha, as more of the returns are now being explained away
pos_alpha = fitted_model.params['Intercept']
pos_alpha_annualised = ((1+pos_alpha)**252) - 1
print("The portfolio's positive alpha is {:.2f}".format(pos_alpha_annualised*100)+"%.")

The portfolio's positive alpha is 27.96%.
