In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import matplotlib.pyplot as plt
import warnings

plt.style.use('seaborn')
# plt.style.use('seaborn-colorblind') #alternative
plt.rcParams['figure.figsize'] = [8, 4.5]
plt.rcParams['figure.dpi'] = 300
warnings.simplefilter(action='ignore', category=FutureWarning)

# Multi-Factor Models

## Implementing the CAPM in Python

1. Import the libraries:

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.api as sm

2. Specify the risky asset and the time horizon:

In [None]:
RISKY_ASSET = 'AMZN'
MARKET_BENCHMARK = '^GSPC'
START_DATE = '2014-01-01'
END_DATE = '2018-12-31'

3. Download data from Yahoo Finance:

In [None]:
df = yf.download([RISKY_ASSET, MARKET_BENCHMARK],
                start=START_DATE,
                end=END_DATE,
                adjusted=True,
                progress=False)

print(f'Downloaded {df.shape[0]} rows of data.')

4. Resample to monthly data and calculate simple returns:

In [None]:
X = df['Adj Close'].rename(columns={RISKY_ASSET: 'asset', 
                                    MARKET_BENCHMARK: 'market'}) \
                   .resample('M') \
                   .last() \
                   .pct_change() \
                   .dropna()

X.head()

5. Calculate beta using the covariance approach: 

In [None]:
covariance = X.cov().iloc[0,1]
benchmark_variance = X.market.var()
beta = covariance / benchmark_variance
beta

6. Prepare the input and estimate CAPM as a linear regression:

In [None]:
# separate target
y = X.pop('asset')

# add constant
X = sm.add_constant(X)

# define and fit the regression model 
capm_model = sm.OLS(y, X).fit()

# print results 
print(capm_model.summary())

## Implementing the Fama-French three-factor model in Python

1. Import the libraries:

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf

2. Define parameters:

In [None]:
RISKY_ASSET = 'FB'
START_DATE = '2013-12-31'
END_DATE = '2018-12-31'

3. Load data from the source CSV file and keep only the monthly data:

In [None]:
# load data from csv
factor_df = pd.read_csv('Research_Data_Factors.csv', skiprows=3)

# identify where the annual data starts
STR_TO_MATCH = ' Annual Factors: January-December '
indices = factor_df.iloc[:, 0] == STR_TO_MATCH
start_of_annual = factor_df[indices].index[0]

# keep only monthly data
factor_df = factor_df[factor_df.index < start_of_annual]

4. Rename columns of the DataFrame, set a datetime index and filter by dates:

In [None]:
# rename columns
factor_df.columns = ['date', 'mkt', 'smb', 'hml', 'rf']

# convert strings to datetime
factor_df['date'] = pd.to_datetime(factor_df['date'], 
                                  format='%Y%m') \
                        .dt.strftime("%Y-%m")

# set index
factor_df = factor_df.set_index('date')

# filter only required dates
factor_df = factor_df.loc[START_DATE:END_DATE]

5. Convert the values to numeric and divide by 100:

In [None]:
factor_df = factor_df.apply(pd.to_numeric, 
                           errors='coerce') \
                    .div(100)
factor_df.head()

6. Download the prices of the risky asset:

In [None]:
asset_df = yf.download(RISKY_ASSET,
                      start=START_DATE,
                      end=END_DATE,
                      adjusted=True,
                      progress=False)

print(f'Downloaded {asset_df.shape[0]} rows of data.')

7. Calculate monthly returns on the risky asset:

In [None]:
y = asset_df['Adj Close'].resample('M') \
                        .last() \
                        .pct_change() \
                        .dropna()

y.index = y.index.strftime('%Y-%m')
y.name = 'rtn'
y.head()

8. Merge the datasets and calculate excess returns:

In [None]:
ff_data = factor_df.join(y)
ff_data['excess_rtn'] = ff_data.rtn - ff_data.rf

9. Estimate the three-factor model:

In [None]:
# define and fit the regression model 
ff_model = smf.ols(formula='excess_rtn ~ mkt + smb + hml', 
                  data=ff_data).fit()

# print results 
print(ff_model.summary())

## Implementing the rolling three-factor model on a portfolio of assets

1. Import the libraries:

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web
import numpy as np

2. Define the parameters:

In [None]:
ASSETS = ['AMZN', 'GOOG', 'AAPL', 'MSFT']
WEIGHTS = [0.25, 0.25, 0.25, 0.25]
START_DATE = '2009-12-31'
END_DATE = '2018-12-31'

3. Download the factor related data:

In [None]:
df_three_factor = web.DataReader('F-F_Research_Data_Factors', 'famafrench', 
                                start=START_DATE)[0]
df_three_factor = df_three_factor.div(100)
df_three_factor.index = df_three_factor.index.format()

4. Download the prices of risky assets from Yahoo Finance:

In [None]:
asset_df = yf.download(ASSETS,
                      start=START_DATE,
                      end=END_DATE,
                      adjusted=True,
                      progress=False)

print(f'Downloaded {asset_df.shape[0]} rows of data.')

5. Calculate the monthly returns on the risky assets:

In [None]:
asset_df = asset_df['Adj Close'].resample('M') \
                                .last() \
                                .pct_change() \
                                .dropna()

# reformat index for joining
asset_df.index = asset_df.index.strftime('%Y-%m')

6. Calculate the portfolio returns:

In [None]:
asset_df['portfolio_returns'] = np.matmul(asset_df[ASSETS].values, 
                                         WEIGHTS)
asset_df.head()

In [None]:
asset_df.plot()

7. Merge the datasets:

In [None]:
ff_data = asset_df.join(df_three_factor).drop(ASSETS, axis=1)
ff_data.columns = ['portf_rtn', 'mkt', 'smb', 'hml', 'rf']
ff_data['portf_ex_rtn'] = ff_data.portf_rtn - ff_data.rf

In [None]:
ff_data.head()

8. Define a function for the rolling n-factor model

In [None]:
def rolling_factor_model(input_data, formula, window_size):
    '''
    Function for estimating the Fama-French (n-factor) model using a rolling window of fixed size.
    
    Parameters
    ------------
    input_data : pd.DataFrame
    A DataFrame containing the factors and asset/portfolio returns
    formula : str
    `statsmodels` compatible formula representing the OLS regression  
    window_size : int
    Rolling window length.
    
    Returns
    -----------
    coeffs_df : pd.DataFrame
    DataFrame containing the intercept and the three factors for each iteration.
    '''
    
    coeffs = []
    
    for start_index in range(len(input_data) - window_size + 1):
        end_index = start_index + window_size
        
        # define and fit the regression model 
        ff_model = smf.ols(
            formula=formula, 
            data=input_data[start_index:end_index]
        ).fit()
        
        # store coefficients
        coeffs.append(ff_model.params)
        
    coeffs_df = pd.DataFrame(
        coeffs, 
        index=input_data.index[window_size - 1:]
     )
        
    return coeffs_df

9. Estimate the rolling three-factor model and plot the results:

In [None]:
MODEL_FORMULA = 'portf_ex_rtn ~ mkt + smb + hml'
results_df = rolling_factor_model(ff_data, 
                                 MODEL_FORMULA, 
                                 window_size=60)
results_df.plot(title = 'Rolling Fama-French Three-Factor model')

plt.tight_layout()
plt.show()

## Implementing the four- and five-factor models in Python

1. Import the libraries:

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web

2. Specify the risky asset and the time horizon:

In [None]:
RISKY_ASSET = 'AMZN'
START_DATE = '2013-12-31'
END_DATE = '2018-12-31'

3. Download the risk factors from prof. French's website:

In [None]:
# three factors 
df_three_factor = web.DataReader('F-F_Research_Data_Factors', 'famafrench', 
                                start=START_DATE)[0]
df_three_factor.index = df_three_factor.index.format()

# momentum factor
df_mom = web.DataReader('F-F_Momentum_Factor', 'famafrench', 
                       start=START_DATE)[0]
df_mom.index = df_mom.index.format()

# five factors
df_five_factor = web.DataReader('F-F_Research_Data_5_Factors_2x3', 
                               'famafrench', 
                               start=START_DATE)[0]
df_five_factor.index = df_five_factor.index.format()

4. Download the data of the risky asset from Yahoo Finance:

In [None]:
asset_df = yf.download(RISKY_ASSET,
                      start=START_DATE,
                      end=END_DATE,
                      adjusted=True,
                      progress=False)

print(f'Downloaded {asset_df.shape[0]} rows of data.')

5. Calculate monthly returns:

In [None]:
y = asset_df['Adj Close'].resample('M') \
                        .last() \
                        .pct_change() \
                        .dropna()

y.index = y.index.strftime('%Y-%m')
y.name = 'return'

6. Merge the datasets for the four-factor models:

In [None]:
# join all datasets on the index
four_factor_data = df_three_factor.join(df_mom).join(y)

# rename columns
four_factor_data.columns = ['mkt', 'smb', 'hml', 'rf', 'mom', 'rtn']

# divide everything (except returns) by 100
four_factor_data.loc[:, four_factor_data.columns != 'rtn'] /= 100

# select period of interest
four_factor_data = four_factor_data.loc[START_DATE:END_DATE]

# calculate excess returns
four_factor_data['excess_rtn'] = four_factor_data.rtn - four_factor_data.rf

four_factor_data.head()

7. Merge the datasets for the five-factor models

In [None]:
# join all datasets on the index
five_factor_data = df_five_factor.join(y)

# rename columns
five_factor_data.columns = ['mkt', 'smb', 'hml', 'rmw', 'cma', 'rf', 'rtn']

# divide everything (except returns) by 100
five_factor_data.loc[:, five_factor_data.columns != 'rtn'] /= 100

# select period of interest
five_factor_data = five_factor_data.loc[START_DATE:END_DATE]

# calculate excess returns
five_factor_data['excess_rtn'] = five_factor_data.rtn - five_factor_data.rf

five_factor_data.head()

8. Estimate the four-factor model:

In [None]:
four_factor_model = smf.ols(formula='excess_rtn ~ mkt + smb + hml + mom', 
                           data=four_factor_data).fit()

print(four_factor_model.summary())

9. Estimate the five-factor model:

In [None]:
five_factor_model = smf.ols(
formula='excess_rtn ~ mkt + smb + hml + rmw + cma',
data=five_factor_data
).fit()

print(five_factor_model.summary())