In [14]:
# How to download and clean Fama-French data
# https://www.codingfinance.com/post/2019-07-01-analyze-ff-factor-python/
# https://randlow.github.io/posts/finance-economics/pandas-datareader-KF/

import pandas_datareader.data as web
from pandas_datareader.famafrench import get_available_datasets
import datetime
#import pickleshare
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [15]:
# version of modules in a fstring
print(f'numpy version: {np.__version__}')
print(f'pandas version: {pd.__version__}')
print(f'statsmodels version: {sm.__version__}')
print(f'yfinance version: {yf.__version__}')

numpy version: 2.0.2
pandas version: 2.2.2
statsmodels version: 0.14.4
yfinance version: 0.2.54


In [3]:
# get the list of available datasets
datasets = get_available_datasets()
print(f'No. of datasets: {len(datasets)}')
#datasets

No. of datasets: 297


In [4]:
datasets_factors_5F = [dataset for dataset in datasets if '_5_' in dataset and 'Factor' in dataset]
datasets_factors_3F = [dataset for dataset in datasets if '_3_' in dataset and 'Factor' in dataset]
datasets_factors_6F = [dataset for dataset in datasets if '_6_' in dataset and 'Factor' in dataset]

datasets_industry_5 = [dataset for dataset in datasets if '5' in dataset and 'Industry' in dataset]
datasets_industry_10 = [dataset for dataset in datasets if '10' in dataset and 'Industry' in dataset]
datasets_industry_12 = [dataset for dataset in datasets if '12' in dataset and 'Industry' in dataset]
datasets_industry_17 = [dataset for dataset in datasets if '17' in dataset and 'Industry' in dataset]
datasets_industry_30 = [dataset for dataset in datasets if '30' in dataset and 'Industry' in dataset]
datasets_industry_38 = [dataset for dataset in datasets if '38' in dataset and 'Industry' in dataset]
datasets_industry_48 = [dataset for dataset in datasets if '48' in dataset and 'Industry' in dataset]
datasets_industry_49 = [dataset for dataset in datasets if '49' in dataset and 'Industry' in dataset]

datasets_factors_daily = [dataset for dataset in datasets if 'daily' in dataset and 'Factor' in dataset]
datasets_factors_weekly = [dataset for dataset in datasets if 'weekly' in dataset and 'Factor' in dataset]
datasets_factors_monthly = [dataset for dataset in datasets if 'monthly' in dataset and 'Factor' in dataset]

datasets_factors = [dataset for dataset in datasets if 'Factor' in dataset in dataset]
datasets_factors

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily',
 'F-F_Momentum_Factor',
 'F-F_Momentum_Factor_daily',
 'F-F_ST_Reversal_Factor',
 'F-F_ST_Reversal_Factor_daily',
 'F-F_LT_Reversal_Factor',
 'F-F_LT_Reversal_Factor_daily',
 'Developed_3_Factors',
 'Developed_3_Factors_Daily',
 'Developed_ex_US_3_Factors',
 'Developed_ex_US_3_Factors_Daily',
 'Europe_3_Factors',
 'Europe_3_Factors_Daily',
 'Japan_3_Factors',
 'Japan_3_Factors_Daily',
 'Asia_Pacific_ex_Japan_3_Factors',
 'Asia_Pacific_ex_Japan_3_Factors_Daily',
 'North_America_3_Factors',
 'North_America_3_Factors_Daily',
 'Developed_5_Factors',
 'Developed_5_Factors_Daily',
 'Developed_ex_US_5_Factors',
 'Developed_ex_US_5_Factors_Daily',
 'Europe_5_Factors',
 'Europe_5_Factors_Daily',
 'Japan_5_Factors',
 'Japan_5_Factors_Daily',
 'Asia_Pacific_ex_Japan_5_Factors',
 'Asia_Pacific_ex_Japan_5_Factors_Daily',

In [5]:
datasets_factors_weekly

['F-F_Research_Data_Factors_weekly']

In [6]:
#create dataframe with elements of datasets_factors but not in datasets_5_factors and datasets_3_factors
datasets_factors_others = [dataset for dataset in datasets_factors if dataset not in datasets_factors_5F and dataset not in datasets_factors_3F]
datasets_factors_others

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Momentum_Factor',
 'F-F_Momentum_Factor_daily',
 'F-F_ST_Reversal_Factor',
 'F-F_ST_Reversal_Factor_daily',
 'F-F_LT_Reversal_Factor',
 'F-F_LT_Reversal_Factor_daily',
 'Developed_Mom_Factor',
 'Developed_Mom_Factor_Daily',
 'Developed_ex_US_Mom_Factor',
 'Developed_ex_US_Mom_Factor_Daily',
 'Europe_Mom_Factor',
 'Europe_Mom_Factor_Daily',
 'Japan_Mom_Factor',
 'Japan_Mom_Factor_Daily',
 'Asia_Pacific_ex_Japan_MOM_Factor',
 'Asia_Pacific_ex_Japan_MOM_Factor_Daily',
 'North_America_Mom_Factor',
 'North_America_Mom_Factor_Daily',
 'Emerging_MOM_Factor']

In [7]:
#create dataframe with elements of datasets but not with factors
datasets_not_factors = [dataset for dataset in datasets if dataset not in datasets_factors]
datasets_not_factors

['Portfolios_Formed_on_ME',
 'Portfolios_Formed_on_ME_Wout_Div',
 'Portfolios_Formed_on_ME_Daily',
 'Portfolios_Formed_on_BE-ME',
 'Portfolios_Formed_on_BE-ME_Wout_Div',
 'Portfolios_Formed_on_BE-ME_Daily',
 'Portfolios_Formed_on_OP',
 'Portfolios_Formed_on_OP_Wout_Div',
 'Portfolios_Formed_on_OP_Daily',
 'Portfolios_Formed_on_INV',
 'Portfolios_Formed_on_INV_Wout_Div',
 'Portfolios_Formed_on_INV_Daily',
 '6_Portfolios_2x3',
 '6_Portfolios_2x3_Wout_Div',
 '6_Portfolios_2x3_weekly',
 '6_Portfolios_2x3_daily',
 '25_Portfolios_5x5',
 '25_Portfolios_5x5_Wout_Div',
 '25_Portfolios_5x5_Daily',
 '100_Portfolios_10x10',
 '100_Portfolios_10x10_Wout_Div',
 '100_Portfolios_10x10_Daily',
 '6_Portfolios_ME_OP_2x3',
 '6_Portfolios_ME_OP_2x3_Wout_Div',
 '6_Portfolios_ME_OP_2x3_daily',
 '25_Portfolios_ME_OP_5x5',
 '25_Portfolios_ME_OP_5x5_Wout_Div',
 '25_Portfolios_ME_OP_5x5_daily',
 '100_Portfolios_ME_OP_10x10',
 '100_Portfolios_10x10_ME_OP_Wout_Div',
 '100_Portfolios_ME_OP_10x10_daily',
 '6_Portfoli

In [None]:
print(f'No. of datasets: {len(datasets)}')                                      # 297  last update: 2025-02-25, same one year ago on 2024-02-27
print(f'Number of datasets_factors: {len(datasets_factors)}')                   # 49
print(f'Number of datasets_factors_5F: {len(datasets_factors_5F)}')             # 15
print(f'Number of datasets_factors_3F: {len(datasets_factors_3F)}')             # 12
print(f'Number of datasets_factors_others: {len(datasets_factors_others)}')     # 22
print(f'Number of datasets_factors_weekly: {len(datasets_factors_weekly)}')     # 1
print(f'Number of datasets_not_factors: {len(datasets_not_factors)}')           # 248

No. of datasets: 297
Number of datasets_factors: 49
Number of datasets_factors_5F: 15
Number of datasets_factors_3F: 12
Number of datasets_factors_others: 22
Number of datasets_factors_weekly: 1
Number of datasets_not_factors: 248


In [9]:
# Fama-French 3F Model & Fama-French 5F Model

# Selecting the dataset of interest in string format
ff3_dataset_name = 'F-F_Research_Data_Factors'
ff5_dataset_name = 'F-F_Research_Data_5_Factors_2x3'
# start = date_format('2010-1-1'', %Y-%m-%d')
start = datetime.datetime(1979, 12, 31)

ff3 = web.DataReader(ff3_dataset_name, 'famafrench', start)[0]
ff5 = web.DataReader(ff5_dataset_name, 'famafrench', start)[0]

#convert in decimal format
ff3 = ff3.apply(lambda x: x/ 100)
ff5 = ff5.apply(lambda x: x/ 100)

#convert PeriodDtype index to datestamp index and end of moonth
ff3.index = ff3.index.to_timestamp() + pd.offsets.MonthEnd()
ff5.index = ff5.index.to_timestamp() + pd.offsets.MonthEnd()
#ff3.index = ff3.index 

#rename columns
ff3_name = 'Fama-French 3-Factor Model'
ff5_name = 'Fama-French 5-Factor Model'

# Display two lines of headers with a list of lists
ff3_superheader = [ff3_name] * len(ff3.columns)
ff3_headers = list(ff3.columns)
ff5_superheader = [ff5_name] * len(ff5.columns)
ff5_headers = list(ff5.columns)
ff3_two_lines_headers = [ff3_superheader, ff3_headers]
ff5_two_lines_headers = [ff5_superheader, ff5_headers]
# ff3.columns = ff3_two_lines_headers
# ff5.columns = ff5_two_lines_headers

#concatenate the two dataframes into one with subcategories in columns and add dataset name in columns header
ff3_5 = pd.concat([ff3, ff5], axis=1)

# concatenate the two headers into one horizontal header with ff3_two_lines_headers and ff5_two_lines_headers
ff3_5.columns = [ff3_superheader + ff5_superheader, ff3_headers + ff5_headers]
#ff3_5.columns = pd.MultiIndex.from_arrays([ff3_superheader + ff5_superheader, ff3_headers + ff5_headers])
ff3_5

  ff3 = web.DataReader(ff3_dataset_name, 'famafrench', start)[0]
  ff3 = web.DataReader(ff3_dataset_name, 'famafrench', start)[0]
  ff5 = web.DataReader(ff5_dataset_name, 'famafrench', start)[0]
  ff5 = web.DataReader(ff5_dataset_name, 'famafrench', start)[0]


Unnamed: 0_level_0,Fama-French 3-Factor Model,Fama-French 3-Factor Model,Fama-French 3-Factor Model,Fama-French 3-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model,Fama-French 5-Factor Model
Unnamed: 0_level_1,Mkt-RF,SMB,HML,RF,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1979-12-31,0.0179,0.0417,-0.0210,0.0095,0.0179,0.0432,-0.0210,-0.0070,-0.0092,0.0095
1980-01-31,0.0551,0.0162,0.0175,0.0080,0.0551,0.0183,0.0175,-0.0170,0.0164,0.0080
1980-02-29,-0.0122,-0.0185,0.0061,0.0089,-0.0122,-0.0157,0.0061,0.0004,0.0268,0.0089
1980-03-31,-0.1290,-0.0664,-0.0101,0.0121,-0.1290,-0.0693,-0.0101,0.0146,-0.0119,0.0121
1980-04-30,0.0397,0.0105,0.0106,0.0126,0.0397,0.0105,0.0106,-0.0210,0.0029,0.0126
...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.0161,-0.0355,-0.0113,0.0048,0.0161,-0.0365,-0.0113,0.0085,0.0086,0.0048
2024-09-30,0.0174,-0.0017,-0.0259,0.0040,0.0174,-0.0102,-0.0259,0.0004,-0.0026,0.0040
2024-10-31,-0.0097,-0.0101,0.0089,0.0039,-0.0097,-0.0088,0.0089,-0.0138,0.0103,0.0039
2024-11-30,0.0651,0.0463,-0.0005,0.0040,0.0651,0.0478,-0.0005,-0.0262,-0.0217,0.0040


In [10]:
ff3

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1979-12-31,0.0179,0.0417,-0.0210,0.0095
1980-01-31,0.0551,0.0162,0.0175,0.0080
1980-02-29,-0.0122,-0.0185,0.0061,0.0089
1980-03-31,-0.1290,-0.0664,-0.0101,0.0121
1980-04-30,0.0397,0.0105,0.0106,0.0126
...,...,...,...,...
2024-08-31,0.0161,-0.0355,-0.0113,0.0048
2024-09-30,0.0174,-0.0017,-0.0259,0.0040
2024-10-31,-0.0097,-0.0101,0.0089,0.0039
2024-11-30,0.0651,0.0463,-0.0005,0.0040


In [11]:
#last date of the dataset
ff_last = ff3.index[-1]
ff_last

Timestamp('2024-12-31 00:00:00')

In [18]:
# get time series of FCNTX
asset_prices = yf.download('FCNTX')
# stop at ff_last
asset_prices = asset_prices[:ff_last]
asset_prices = asset_prices[['Close']]
asset_prices.columns = ['Asset']

asset_prices.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Asset
Date,Unnamed: 1_level_1
2024-12-24,21.639999
2024-12-26,21.6
2024-12-27,21.370001
2024-12-30,21.16
2024-12-31,21.030001


In [19]:
asset_prices_monthly = asset_prices.resample('M').last()
asset_prices_monthly = asset_prices_monthly[:'2019-06-30']
ff3 = ff3['1980-02-29':'2019-06-30']
asset = asset_prices_monthly.pct_change().dropna()

  asset_prices_monthly = asset_prices.resample('M').last()


In [20]:
asset['Asset-RF'] = asset['Asset'] - ff3['RF']
asset

Unnamed: 0_level_0,Asset,Asset-RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-02-29,-0.016874,-0.025774
1980-03-31,-0.089431,-0.101531
1980-04-30,0.017858,0.005258
1980-05-31,0.078947,0.070847
1980-06-30,0.011743,0.005643
...,...,...
2019-02-28,0.023960,0.022160
2019-03-31,0.022077,0.020177
2019-04-30,0.048800,0.046700
2019-05-31,-0.057208,-0.059308


In [21]:
# Run the multiple regression model
#dependent variable
y = asset['Asset-RF'][-50:]
#independent variables
x = ff3['Mkt-RF'][-50:]
x = sm.add_constant(x)
#model
model = sm.OLS(y, x)
results = model.fit()
print(results.summary())

                             OLS Regression Results                            
Dep. Variable:                Asset-RF   R-squared:                       0.866
Model:                             OLS   Adj. R-squared:                  0.863
Method:                  Least Squares   F-statistic:                     309.0
Date:              mar., 25 févr. 2025   Prob (F-statistic):           1.50e-22
Time:                         21:18:57   Log-Likelihood:                 141.86
No. Observations:                   50   AIC:                            -279.7
Df Residuals:                       48   BIC:                            -275.9
Df Model:                            1                                         
Covariance Type:             nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0020      0.002      0.96

In [22]:
# Run the multiple regression model
#dependent variable
y = asset['Asset-RF'].loc['1980-01-31':'2019-05-31']
#independent three variables
x = ff3[['Mkt-RF', 'SMB', 'HML']].loc['1980-01-31':'2019-05-31']
x = sm.add_constant(x)
#model
model = sm.OLS(y, x)
print(model.fit().summary())

                             OLS Regression Results                            
Dep. Variable:                Asset-RF   R-squared:                       0.788
Model:                             OLS   Adj. R-squared:                  0.787
Method:                  Least Squares   F-statistic:                     580.6
Date:              mar., 25 févr. 2025   Prob (F-statistic):          2.82e-157
Time:                         21:19:03   Log-Likelihood:                 1152.9
No. Observations:                  472   AIC:                            -2298.
Df Residuals:                      468   BIC:                            -2281.
Df Model:                            3                                         
Covariance Type:             nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0012      0.001      1.20

In [23]:
model.fit().params

const     0.001195
Mkt-RF    0.893397
SMB       0.029186
HML      -0.107796
dtype: float64

In [24]:
pd.concat([x, y], axis=1)

Unnamed: 0_level_0,const,Mkt-RF,SMB,HML,Asset-RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-02-29,1.0,-0.0122,-0.0185,0.0061,-0.025774
1980-03-31,1.0,-0.1290,-0.0664,-0.0101,-0.101531
1980-04-30,1.0,0.0397,0.0105,0.0106,0.005258
1980-05-31,1.0,0.0526,0.0213,0.0038,0.070847
1980-06-30,1.0,0.0306,0.0166,-0.0076,0.005643
...,...,...,...,...,...
2019-01-31,1.0,0.0840,0.0288,-0.0045,0.092360
2019-02-28,1.0,0.0340,0.0206,-0.0271,0.022160
2019-03-31,1.0,0.0110,-0.0305,-0.0412,0.020177
2019-04-30,1.0,0.0397,-0.0172,0.0216,0.046700
