In [2]:
# How to download and clean Fama-French data
# https://www.youtube.com/watch?v=0eQv4xVbVfQ
# https://randlow.github.io/posts/finance-economics/pandas-datareader-KF/

import pandas_datareader.data as web
from pandas_datareader.famafrench import get_available_datasets
import datetime
import pickleshare
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [3]:
# get the list of available datasets
datasets = get_available_datasets()
print('No. of datasets:{}'.format(len(datasets)))
#datasets

No. of datasets:297


In [4]:
datasets_factors_5F = [dataset for dataset in datasets if '_5_' in dataset and 'Factor' in dataset]
datasets_factors_3F = [dataset for dataset in datasets if '_3_' in dataset and 'Factor' in dataset]

datasets_factors_weekly = [dataset for dataset in datasets if 'weekly' in dataset and 'Factor' in dataset]


datasets_factors = [dataset for dataset in datasets if 'Factor' in dataset in dataset]
datasets_factors

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily',
 'F-F_Momentum_Factor',
 'F-F_Momentum_Factor_daily',
 'F-F_ST_Reversal_Factor',
 'F-F_ST_Reversal_Factor_daily',
 'F-F_LT_Reversal_Factor',
 'F-F_LT_Reversal_Factor_daily',
 'Developed_3_Factors',
 'Developed_3_Factors_Daily',
 'Developed_ex_US_3_Factors',
 'Developed_ex_US_3_Factors_Daily',
 'Europe_3_Factors',
 'Europe_3_Factors_Daily',
 'Japan_3_Factors',
 'Japan_3_Factors_Daily',
 'Asia_Pacific_ex_Japan_3_Factors',
 'Asia_Pacific_ex_Japan_3_Factors_Daily',
 'North_America_3_Factors',
 'North_America_3_Factors_Daily',
 'Developed_5_Factors',
 'Developed_5_Factors_Daily',
 'Developed_ex_US_5_Factors',
 'Developed_ex_US_5_Factors_Daily',
 'Europe_5_Factors',
 'Europe_5_Factors_Daily',
 'Japan_5_Factors',
 'Japan_5_Factors_Daily',
 'Asia_Pacific_ex_Japan_5_Factors',
 'Asia_Pacific_ex_Japan_5_Factors_Daily',

In [5]:
#create dataframe with elements of datasets_factors but not in datasets_5_factors and datasets_3_factors
datasets_factors_others = [dataset for dataset in datasets_factors if dataset not in datasets_factors_5F and dataset not in datasets_factors_3F]
datasets_factors_others

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Momentum_Factor',
 'F-F_Momentum_Factor_daily',
 'F-F_ST_Reversal_Factor',
 'F-F_ST_Reversal_Factor_daily',
 'F-F_LT_Reversal_Factor',
 'F-F_LT_Reversal_Factor_daily',
 'Developed_Mom_Factor',
 'Developed_Mom_Factor_Daily',
 'Developed_ex_US_Mom_Factor',
 'Developed_ex_US_Mom_Factor_Daily',
 'Europe_Mom_Factor',
 'Europe_Mom_Factor_Daily',
 'Japan_Mom_Factor',
 'Japan_Mom_Factor_Daily',
 'Asia_Pacific_ex_Japan_MOM_Factor',
 'Asia_Pacific_ex_Japan_MOM_Factor_Daily',
 'North_America_Mom_Factor',
 'North_America_Mom_Factor_Daily',
 'Emerging_MOM_Factor']

In [6]:
print(f"Number of datasets_factors: {len(datasets_factors)}")                   # 49
print(f"Number of datasets_factors_5F: {len(datasets_factors_5F)}")             # 15
print(f"Number of datasets_factors_3F: {len(datasets_factors_3F)}")             # 12
print(f"Number of datasets_factors_others: {len(datasets_factors_others)}")     # 22
print(f"Number of datasets_factors_weekly: {len(datasets_factors_weekly)}")     # 1

Number of datasets_factors: 49
Number of datasets_factors_5F: 15
Number of datasets_factors_3F: 12
Number of datasets_factors_others: 22
Number of datasets_factors_weekly: 1


In [7]:
# Fama-French 3F Model & Fama-French 5F Model

# Selecting the dataset of interest in string format
ff3_dataset_name = 'F-F_Research_Data_Factors'
ff5_dataset_name = 'F-F_Research_Data_5_Factors_2x3'
# start = date_format("2010-1-1"", %Y-%m-%d")
start = datetime.datetime(1979, 12, 31)

ff3 = web.DataReader(ff3_dataset_name, 'famafrench', start)[0]
ff5 = web.DataReader(ff5_dataset_name, 'famafrench', start)[0]

#convert in decimal format
ff3 = ff3.apply(lambda x: x/ 100)
ff5 = ff5.apply(lambda x: x/ 100)

#convert PeriodDtype index to datestamp index and end of moonth
ff3.index = ff3.index.to_timestamp() + pd.offsets.MonthEnd()
ff5.index = ff5.index.to_timestamp() + pd.offsets.MonthEnd()
#ff3.index = ff3.index 

#rename columns
ff3_name = 'Fama-French 3-Factor Model'
ff5_name = 'Fama-French 5-Factor Model'

# Display two lines of headers with a list of lists
ff3_superheader = [ff3_name] * len(ff3.columns)
ff3_headers = list(ff3.columns)
ff5_superheader = [ff5_name] * len(ff5.columns)
ff5_headers = list(ff5.columns)
ff3_two_lines_headers = [ff3_superheader, ff3_headers]
ff5_two_lines_headers = [ff5_superheader, ff5_headers]
# ff3.columns = ff3_two_lines_headers
# ff5.columns = ff5_two_lines_headers

#concatenate the two dataframes into one with subcategories in columns and add dataset name in columns header
ff3_5 = pd.concat([ff3, ff5], axis=1)

# concatenate the two headers into one horizontal header with ff3_two_lines_headers and ff5_two_lines_headers
ff3_5.columns = [ff3_superheader + ff5_superheader, ff3_headers + ff5_headers]
#ff3_5.columns = pd.MultiIndex.from_arrays([ff3_superheader + ff5_superheader, ff3_headers + ff5_headers])
ff3_5

  ff3 = web.DataReader(ff3_dataset_name, 'famafrench', start)[0]
  ff3 = web.DataReader(ff3_dataset_name, 'famafrench', start)[0]
  ff5 = web.DataReader(ff5_dataset_name, 'famafrench', start)[0]
  ff5 = web.DataReader(ff5_dataset_name, 'famafrench', start)[0]


Unnamed: 0_level_0,Fama-French 3-Factor Model,Fama-French 3-Factor Model,Fama-French 3-Factor Model,Fama-French 3-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model
Unnamed: 0_level_1,Mkt-RF,SMB,HML,RF,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1979-12-31,0.0179,0.0417,-0.0210,0.0095,0.0179,0.0432,-0.0210,-0.0070,-0.0092,0.0095
1980-01-31,0.0551,0.0162,0.0175,0.0080,0.0551,0.0183,0.0175,-0.0170,0.0164,0.0080
1980-02-29,-0.0122,-0.0185,0.0061,0.0089,-0.0122,-0.0157,0.0061,0.0004,0.0268,0.0089
1980-03-31,-0.1290,-0.0664,-0.0101,0.0121,-0.1290,-0.0693,-0.0101,0.0146,-0.0119,0.0121
1980-04-30,0.0397,0.0105,0.0106,0.0126,0.0397,0.0105,0.0106,-0.0210,0.0029,0.0126
...,...,...,...,...,...,...,...,...,...,...
2023-03-31,0.0251,-0.0551,-0.0885,0.0036,0.0251,-0.0694,-0.0885,0.0224,-0.0237,0.0036
2023-04-30,0.0061,-0.0335,-0.0004,0.0035,0.0061,-0.0256,-0.0004,0.0242,0.0286,0.0035
2023-05-31,0.0035,0.0161,-0.0772,0.0036,0.0035,-0.0038,-0.0772,-0.0181,-0.0722,0.0036
2023-06-30,0.0646,0.0154,-0.0026,0.0040,0.0646,0.0134,-0.0026,0.0218,-0.0162,0.0040


In [8]:
ff3

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1979-12-31,0.0179,0.0417,-0.0210,0.0095
1980-01-31,0.0551,0.0162,0.0175,0.0080
1980-02-29,-0.0122,-0.0185,0.0061,0.0089
1980-03-31,-0.1290,-0.0664,-0.0101,0.0121
1980-04-30,0.0397,0.0105,0.0106,0.0126
...,...,...,...,...
2023-03-31,0.0251,-0.0551,-0.0885,0.0036
2023-04-30,0.0061,-0.0335,-0.0004,0.0035
2023-05-31,0.0035,0.0161,-0.0772,0.0036
2023-06-30,0.0646,0.0154,-0.0026,0.0040


In [9]:
#last date of the dataset
ff_last = ff3.index[-1]
ff_last

Timestamp('2023-07-31 00:00:00')

In [10]:
# get time series of FCNTX
asset_prices = yf.download("FCNTX")
# stop at ff_last
asset_prices = asset_prices[:ff_last]
asset_prices = asset_prices[['Close']]
asset_prices.columns = ['Asset']

asset_prices.tail()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Asset
Date,Unnamed: 1_level_1
2023-07-25,15.28
2023-07-26,15.3
2023-07-27,15.31
2023-07-28,15.53
2023-07-31,15.54


In [11]:
asset_prices_monthly = asset_prices.resample('M').last()
asset = asset_prices_monthly.pct_change().dropna()

In [12]:
asset['Asset-RF'] = asset['Asset'] - ff3['RF']
asset

Unnamed: 0_level_0,Asset,Asset-RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-02-29,-0.016874,-0.025774
1980-03-31,-0.089431,-0.101531
1980-04-30,0.017857,0.005257
1980-05-31,0.078947,0.070847
1980-06-30,0.011743,0.005643
...,...,...
2023-03-31,0.058964,0.055364
2023-04-30,0.032355,0.028855
2023-05-31,0.025510,0.021910
2023-06-30,0.060412,0.056412


In [13]:
# Run the multiple regression model
#dependent variable
y = asset['Asset-RF'][-50:]
#independent variables
x = ff3['Mkt-RF'][-50:]
x = sm.add_constant(x)
#model
model = sm.OLS(y, x)
results = model.fit()
display(results.summary())

0,1,2,3
Dep. Variable:,Asset-RF,R-squared:,0.799
Model:,OLS,Adj. R-squared:,0.795
Method:,Least Squares,F-statistic:,191.4
Date:,"Wed, 11 Oct 2023",Prob (F-statistic):,2.28e-18
Time:,23:36:18,Log-Likelihood:,108.83
No. Observations:,50,AIC:,-213.7
Df Residuals:,48,BIC:,-209.8
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0061,0.004,-1.511,0.137,-0.014,0.002
Mkt-RF,0.9851,0.071,13.834,0.000,0.842,1.128

0,1,2,3
Omnibus:,23.962,Durbin-Watson:,1.737
Prob(Omnibus):,0.0,Jarque-Bera (JB):,36.859
Skew:,-1.598,Prob(JB):,9.91e-09
Kurtosis:,5.734,Cond. No.,18.0


In [14]:
# Run the multiple regression model
#dependent variable
y = asset['Asset-RF'].loc['2016-01-31':'2019-05-31']
#independent three variables
x = ff3[['Mkt-RF', 'SMB', 'HML']].loc['2016-01-31':'2019-05-31']
x = sm.add_constant(x)
#model
model = sm.OLS(y, x)
display(model.fit().summary())

0,1,2,3
Dep. Variable:,Asset-RF,R-squared:,0.854
Model:,OLS,Adj. R-squared:,0.842
Method:,Least Squares,F-statistic:,72.22
Date:,"Wed, 11 Oct 2023",Prob (F-statistic):,1.57e-15
Time:,23:36:18,Log-Likelihood:,109.02
No. Observations:,41,AIC:,-210.0
Df Residuals:,37,BIC:,-203.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0058,0.003,-2.004,0.052,-0.012,6.53e-05
Mkt-RF,1.1606,0.084,13.784,0.000,0.990,1.331
SMB,-0.1856,0.122,-1.516,0.138,-0.434,0.062
HML,-0.3479,0.109,-3.204,0.003,-0.568,-0.128

0,1,2,3
Omnibus:,15.829,Durbin-Watson:,1.947
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18.658
Skew:,-1.286,Prob(JB):,8.88e-05
Kurtosis:,5.076,Cond. No.,46.9


In [15]:
model.fit().params

const    -0.005811
Mkt-RF    1.160639
SMB      -0.185583
HML      -0.347898
dtype: float64