In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Factors Models


## Data Cleaning

In [51]:
df = pd.read_csv('./data/Stock_Bond_2004_to_2006.csv')
df = df[252:504]
df.head()

Unnamed: 0,Date,DATE,Three_month_treasury,GM_Volume,GM_AC,F_Volume,F_AC,UTX_Volume,UTX_AC,CAT_Volume,...,S&P_Volume,1 year Treasury Constant Maturity Rate,3-Year Treasury Constant Maturity Rate,10 year Treasury Constant Maturity Rate,30 year Treasury Constant Maturity Rate,Aaa Bond Yield,Baa Bond Yield,$/Euro,Yen/$,Brazil Real/$
252,3-Jan-05,1/3/2005,2.29,6518500,36.77,9852200,13.68,4697600,50.02,6294000,...,1510800000,2.79,3.28,4.23,,5.43,6.09,1.3476,102.83,2.6695
253,4-Jan-05,1/4/2005,2.3,6590400,36.4,9035400,13.63,4759400,49.44,7579400,...,1720999936,2.82,3.38,4.29,,5.49,6.14,1.3295,104.27,2.702
254,5-Jan-05,1/5/2005,2.29,5459400,35.69,11376200,13.42,4754200,49.07,5932800,...,1738899968,2.83,3.39,4.29,,5.47,6.12,1.3292,103.95,2.697
255,6-Jan-05,1/6/2005,2.27,4508600,35.99,6672600,13.44,3942800,49.1,6300400,...,1569100032,2.82,3.36,4.29,,5.48,6.13,1.3187,104.87,2.719
256,7-Jan-05,1/7/2005,2.29,5575800,35.58,11452500,13.62,3676800,48.57,5815400,...,1477900032,2.82,3.4,4.29,,5.47,6.12,1.3062,104.93,2.7095


In [88]:
factors = pd.read_csv('./data/factors.CSV', delimiter = ',')
factors.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
factors = factors.iloc[:-1,]
factors['Date'] = pd.to_datetime(factors['Date'], format = '%Y%m%d')
factors = factors.set_index(['Date'])
factors_2005 = factors.loc['2005']
factors_2005.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-03,-0.97,-0.6,-0.07,0.008
2005-01-04,-1.3,-0.61,0.46,0.008
2005-01-05,-0.51,-1.12,0.01,0.008
2005-01-06,0.34,-0.15,0.14,0.008
2005-01-07,-0.22,-0.81,-0.09,0.008


In [103]:
stock_subset = df[['GM_AC', 'F_AC', 'UTX_AC', 'MRK_AC']]
stock_subset = stock_subset.set_index(factors_2005.index)
stock_diff = stock_subset.diff().dropna().sub(factors_2005[1:]['RF'], axis = 0)
stock_diff.head()

Unnamed: 0_level_0,GM_AC,F_AC,UTX_AC,MRK_AC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-04,-0.378,-0.058,-0.588,-0.128
2005-01-05,-0.718,-0.218,-0.378,0.182
2005-01-06,0.292,0.012,0.022,0.192
2005-01-07,-0.418,0.172,-0.538,-0.398
2005-01-10,-0.468,-0.098,-0.138,0.332


# Linear Model

In [104]:
import statsmodels.api as sm

In [138]:
n_investments = len(stock_diff.columns)
res_matrix = np.zeros((len(stock_diff), n_investments))
betas = np.zeros(n_investments)

X = sm.add_constant(factors_2005.iloc[1:, ]['Mkt-RF'])

for i in range(n_investments):
    y = stock_diff.iloc[:, i]
    model = sm.OLS(y, X).fit()
    res_matrix[:, i] = model.resid
    betas[i] = model.params.iloc[1]
    print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  GM_AC   R-squared:                       0.129
Model:                            OLS   Adj. R-squared:                  0.126
Method:                 Least Squares   F-statistic:                     37.02
Date:                Tue, 30 Apr 2024   Prob (F-statistic):           4.39e-09
Time:                        12:01:33   Log-Likelihood:                -260.62
No. Observations:                 251   AIC:                             525.2
Df Residuals:                     249   BIC:                             532.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0901      0.043     -2.079      0.0

Based on the summary, the intercepts of GM and F have p value that are less than 0.05, which indicates that we can reject the null hypothesis that their intercepts are zeros.

The p value of intercept of GM is 0.039, and the p value of intercept of F is 0.000.

Since both intercepts are negative (-0.091, -0.03), it means that both stocks are overpriced.

# Correlation matrix

We can see that the correlation of residual between GM and F is relatively large.

In [139]:
np.corrcoef(res_matrix.T)

array([[ 1.        ,  0.54544422, -0.08135563,  0.00523007],
       [ 0.54544422,  1.        , -0.03939744,  0.08670985],
       [-0.08135563, -0.03939744,  1.        , -0.03639009],
       [ 0.00523007,  0.08670985, -0.03639009,  1.        ]])

# Covariance matrix estimation

## Sample Covariance Matrix

In [160]:
stock_diff.cov()

Unnamed: 0,GM_AC,F_AC,UTX_AC,MRK_AC
GM_AC,0.538668,0.07814,0.066804,0.034455
F_AC,0.07814,0.029656,0.024905,0.01617
UTX_AC,0.066804,0.024905,0.276662,0.035156
MRK_AC,0.034455,0.01617,0.035156,0.231836


## CAPM estimation of covariance

In [157]:
sigF = np.var(factors_2005['Mkt-RF'])
sigep = np.diag(np.var(res_matrix, axis=0))
betas = betas.reshape(-1, 1)
cov = betas @ betas.T * sigF + sigep
cov

array([[0.53686003, 0.02135826, 0.08933317, 0.03281785],
       [0.02135826, 0.02956989, 0.02734358, 0.01004507],
       [0.08933317, 0.02734358, 0.27611449, 0.04201454],
       [0.03281785, 0.01004507, 0.04201454, 0.23098688]])

There is no any large discrepancies between the two estimates of covariance matrix.

## Fit the Fama-French three-factor model

The Capital Asset Pricing Model (CAPM) provides a simple and intuitive prediction regarding how expected returns on stocks are related to their market risk, quantified through the beta coefficient. The model posits that the expected return on a security is solely a function of its systematic risk relative to the overall market (captured by the beta coefficient), plus the risk-free rate. It asserts that the market is the only relevant risk factor, and hence, the only source of rewarded risk.

For UTX and MRK, the slope is significant where is not zero.

In [168]:
X = sm.add_constant(factors_2005.iloc[1:][['Mkt-RF', 'SMB', 'HML']])

for i in range(n_investments):
    y = stock_diff.iloc[:, i]
    model = sm.OLS(y, X).fit()
    print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  GM_AC   R-squared:                       0.148
Model:                            OLS   Adj. R-squared:                  0.137
Method:                 Least Squares   F-statistic:                     14.26
Date:                Tue, 30 Apr 2024   Prob (F-statistic):           1.33e-08
Time:                        12:12:20   Log-Likelihood:                -257.96
No. Observations:                 251   AIC:                             523.9
Df Residuals:                     247   BIC:                             538.0
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1052      0.044     -2.417      0.0