## Actual Performance
### Fama-French Regressions
### Import Packages

Try different factor models (on the RHS), at least:
* a 0-factor (relative to risk-free rate),
* a 1-factor (relative to CAPM),
* a more general factor portfolio.

Our four-factor benchmark:
* VOO (S&P500) - RF, where RF is not yield but return!
* VTV (Value) - VUG (Growth) aka HML
* VIG (Dividends) - VOO (S&P500) aka CMA
* VV (Large-Cap) - VBR (Small-Cap) aka SMB

In [97]:
import pandas as pd
import numpy as np
import datetime as dt
import psycopg2 
import matplotlib.pyplot as plt
from dateutil.relativedelta import *
from pandas.tseries.offsets import *
from scipy import stats
import statsmodels.api as sm
import statistics
import sys
sys.path.insert(0, "../")
import util
from importlib import reload
util=reload(util)

In [98]:
#Set a start date
StartDate='01/10/2022'
# EndDate='05/07/2022'
EndDate='05/04/2022'

Math notes:
Closing of inception / closing of last day -1

## Neglected Beta
### Import Data
#### Daily Returns of Portfolio

In [99]:
data = pd.read_csv('Neglected_Beta_Daily_Returns_5.5.22.csv', usecols=lambda x: x.lower())

# Rename columns
data.columns = ['Date','Daily_Returns','Russell_2000','SP_500']

# Remove % signs
data['Daily_Returns'] = data['Daily_Returns'].str.replace('%', '')
data['Russell_2000'] = data['Russell_2000'].str.replace('%', '')
data['SP_500'] = data['SP_500'].str.replace('%', '')

# Format returns
data['daily_returns'] = data['Daily_Returns'].astype(float)
data['russell_2000'] = data['Russell_2000'].astype(float)
data['sp_500'] = data['SP_500'].astype(float)

# Convert to % to numbers
data['daily_returns'] = data['daily_returns']/100
data['russell_2000'] = data['russell_2000']/100
data['sp_500'] = data['sp_500']/100

# Format Date
data['date'] = pd.to_datetime(data['Date'], format='%m/%d/%y')

data.head()

Unnamed: 0,Date,Daily_Returns,Russell_2000,SP_500,daily_returns,russell_2000,sp_500,date
0,1/10/22,0.0,0.0,0.0,0.0,0.0,0.0,2022-01-10
1,1/11/22,1.26,2.61,0.92,0.0126,0.0261,0.0092,2022-01-11
2,1/12/22,-0.38,-0.82,0.28,-0.0038,-0.0082,0.0028,2022-01-12
3,1/13/22,-2.14,-0.76,-1.42,-0.0214,-0.0076,-0.0142,2022-01-13
4,1/14/22,0.16,0.14,0.08,0.0016,0.0014,0.0008,2022-01-14


In [100]:
# Drop extra variables
data.drop(['Date'], axis = 1, inplace=True)
data.drop(['Daily_Returns'], axis = 1, inplace=True)
data.drop(['Russell_2000'], axis = 1, inplace=True)
data.drop(['SP_500'], axis = 1, inplace=True)

data.head()

Unnamed: 0,daily_returns,russell_2000,sp_500,date
0,0.0,0.0,0.0,2022-01-10
1,0.0126,0.0261,0.0092,2022-01-11
2,-0.0038,-0.0082,0.0028,2022-01-12
3,-0.0214,-0.0076,-0.0142,2022-01-13
4,0.0016,0.0014,0.0008,2022-01-14


## Vanguard Factors
### Import Data

In [101]:
SHY = pd.read_csv('SHY.csv')
VBR = pd.read_csv('VBR.csv')
VIG = pd.read_csv('VIG.csv')
VOO = pd.read_csv('VOO.csv')
VTV = pd.read_csv('VTV.csv')
VUG = pd.read_csv('VUG.csv')
VV = pd.read_csv('VV.csv')

VOO.head()

Unnamed: 0,Date,Adj Close,VOO_Daily_Returns
0,1/3/22,437.775574,
1,1/4/22,437.596191,-0.00041
2,1/5/22,429.214417,-0.019154
3,1/6/22,428.636353,-0.001347
4,1/7/22,427.151337,-0.003465


In [102]:
# Convert Date to an actual date format
SHY['date'] = pd.to_datetime(SHY['Date'].astype(str), format='%m/%d/%y')
VBR['date'] = pd.to_datetime(VBR['Date'].astype(str), format='%m/%d/%y')
VIG['date'] = pd.to_datetime(VIG['Date'].astype(str), format='%m/%d/%y')
VOO['date'] = pd.to_datetime(VOO['Date'].astype(str), format='%m/%d/%y')
VTV['date'] = pd.to_datetime(VTV['Date'].astype(str), format='%m/%d/%y')
VUG['date'] = pd.to_datetime(VUG['Date'].astype(str), format='%m/%d/%y')
VV['date'] = pd.to_datetime(VV['Date'].astype(str), format='%m/%d/%y')

SHY.head()

Unnamed: 0,Date,Adj Close,RF_Daily_Returns,date
0,1/3/22,85.309433,,2022-01-03
1,1/4/22,85.359344,0.000585,2022-01-04
2,1/5/22,85.249527,-0.001287,2022-01-05
3,1/6/22,85.179634,-0.00082,2022-01-06
4,1/7/22,85.159668,-0.000234,2022-01-07


In [103]:
# Drop extra variables
SHY.drop(['Date'], axis = 1, inplace=True)
VBR.drop(['Date'], axis = 1, inplace=True)
VIG.drop(['Date'], axis = 1, inplace=True)
VOO.drop(['Date'], axis = 1, inplace=True)
VTV.drop(['Date'], axis = 1, inplace=True)
VUG.drop(['Date'], axis = 1, inplace=True)
VV.drop(['Date'], axis = 1, inplace=True)

SHY.drop(['Adj Close'], axis = 1, inplace=True)
VBR.drop(['Adj Close'], axis = 1, inplace=True)
VIG.drop(['Adj Close'], axis = 1, inplace=True)
VOO.drop(['Adj Close'], axis = 1, inplace=True)
VTV.drop(['Adj Close'], axis = 1, inplace=True)
VUG.drop(['Adj Close'], axis = 1, inplace=True)
VV.drop(['Adj Close'], axis = 1, inplace=True)

In [104]:
# Exclude days outside of window 1/10/2022 - 5/7/2022
SHY = SHY[(SHY['date'] >= StartDate) & (SHY['date'] <= EndDate)]
VBR = VBR[(VBR['date'] >= StartDate) & (VBR['date'] <= EndDate)]
VIG = VIG[(VIG['date'] >= StartDate) & (VIG['date'] <= EndDate)]
VOO = VOO[(VOO['date'] >= StartDate) & (VOO['date'] <= EndDate)]
VTV = VTV[(VTV['date'] >= StartDate) & (VTV['date'] <= EndDate)]
VUG = VUG[(VUG['date'] >= StartDate) & (VUG['date'] <= EndDate)]
VV = VV[(VV['date'] >= StartDate) & (VV['date'] <= EndDate)]

SHY.head()

Unnamed: 0,RF_Daily_Returns,date
5,-0.000586,2022-01-10
6,0.000352,2022-01-11
7,-0.000352,2022-01-12
8,0.000469,2022-01-13
9,-0.00129,2022-01-14


In [105]:
SHY.dtypes

RF_Daily_Returns           float64
date                datetime64[ns]
dtype: object

## Combine Datasets

In [106]:
returnReg1 = pd.merge(data,SHY,how='inner',on=['date'])
returnReg2 = pd.merge(returnReg1,VBR,how='inner',on=['date'])
returnReg3 = pd.merge(returnReg2,VIG,how='inner',on=['date'])
returnReg4 = pd.merge(returnReg3,VOO,how='inner',on=['date'])
returnReg5 = pd.merge(returnReg4,VTV,how='inner',on=['date'])
returnReg6 = pd.merge(returnReg5,VUG,how='inner',on=['date'])
returnReg = pd.merge(returnReg6,VV,how='inner',on=['date'])

In [107]:
returnReg.tail()

Unnamed: 0,daily_returns,russell_2000,sp_500,date,RF_Daily_Returns,VBR_Daily_Returns,VIG_Daily_Returns,VOO_Daily_Returns,VTV_Daily_Returns,VUG_Daily_Returns,VV_Daily_Returns
75,0.0175,0.018,0.0247,2022-04-28,-0.000722,0.018879,0.018712,0.025057,0.014618,0.035356,0.02549
76,-0.0282,-0.0281,-0.0363,2022-04-29,-0.001926,-0.027558,-0.032144,-0.03673,-0.030057,-0.043954,-0.036979
77,0.0137,0.0101,0.0057,2022-05-02,-7.2e-05,0.003823,0.001235,0.006285,0.000426,0.01313,0.006664
78,0.0077,0.0085,0.0048,2022-05-03,-0.000483,0.015476,0.001363,0.00433,0.008525,-0.000867,0.003941
79,0.0298,0.0269,0.0299,2022-05-04,0.002657,0.026848,0.027745,0.030335,0.026698,0.032526,0.029569


* VOO (S&P500) - RF, where RF is not yield but return!
* VTV (Value) - VUG (Growth)
* VIG (Dividends) - VOO (S&P500)
* VV (Large-Cap) - VBR (Small-Cap)

In [108]:
# Create Excess Returns column
returnReg['XSret'] =  returnReg['daily_returns']-returnReg['RF_Daily_Returns']
returnReg['MKTRF'] =  returnReg['VOO_Daily_Returns']-returnReg['RF_Daily_Returns']
returnReg['VTV_VUG'] =  returnReg['VTV_Daily_Returns']-returnReg['VUG_Daily_Returns']
returnReg['VIG_VOO'] =  returnReg['VIG_Daily_Returns']-returnReg['VOO_Daily_Returns']
returnReg['VV_VBR'] =  returnReg['VV_Daily_Returns']-returnReg['VBR_Daily_Returns']

In [109]:
# Normalize start date to 0 and convert Ken French data from percent back to number
returnReg['MKTRF']=np.where(returnReg['date']==StartDate,0, returnReg['MKTRF'])
returnReg['VTV_VUG']=np.where(returnReg['date']==StartDate,0, returnReg['VTV_VUG'])
returnReg['VIG_VOO']=np.where(returnReg['date']==StartDate,0, returnReg['VIG_VOO'])
returnReg['VV_VBR']=np.where(returnReg['date']==StartDate,0, returnReg['VV_VBR'])
returnReg['RF']=np.where(returnReg['date']==StartDate,0, returnReg['RF_Daily_Returns'])

In [110]:
# Keep Only
returnReg_sub = returnReg[['XSret', 'RF', 'MKTRF', 'VTV_VUG', 'VIG_VOO', 'VV_VBR']]

returnReg_sub.head()

Unnamed: 0,XSret,RF,MKTRF,VTV_VUG,VIG_VOO,VV_VBR
0,0.000586,0.0,0.0,0.0,0.0,0.0
1,0.012248,0.000352,0.008409,-0.006113,-0.004654,8.34e-07
2,-0.003448,-0.000352,0.00334,-0.003444,-0.001091,0.004568589
3,-0.021869,0.000469,-0.014255,0.024313,0.006627,-0.01714997
4,0.00289,-0.00129,0.001594,-0.003596,-0.006502,-0.000104613


Try different factor models (on the RHS), at least:
* a 0-factor (relative to risk-free rate),
* a 1-factor (relative to CAPM),
* a more general factor portfolio.

In [111]:
# Define Fama-French Factors as independent varialbles
# 0 Factor Model
X1=sm.add_constant(returnReg[['RF']])
# CAPM
X2=sm.add_constant(returnReg[['MKTRF']])
# Vanguard Multi-Factor Model
X3=sm.add_constant(returnReg[['MKTRF','VTV_VUG','VIG_VOO','VV_VBR']])

In [112]:
# Define Excess Returns as the dependent variable
Y=returnReg['XSret']

In [113]:
# RF Only (0-factor)
print("0-Factor Regression")
model1F = sm.OLS(Y,X1).fit()
print(model1F.summary())

0-Factor Regression
                            OLS Regression Results                            
Dep. Variable:                  XSret   R-squared:                       0.009
Model:                            OLS   Adj. R-squared:                 -0.004
Method:                 Least Squares   F-statistic:                    0.6973
Date:                Mon, 16 May 2022   Prob (F-statistic):              0.406
Time:                        20:59:10   Log-Likelihood:                 208.86
No. Observations:                  80   AIC:                            -413.7
Df Residuals:                      78   BIC:                            -408.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0014      0.002

In [114]:
# CAPM (FF 1-factor)
print("1-Factor Regression (CAPM)")
model2F = sm.OLS(Y,X2).fit()
print(model2F.summary())

1-Factor Regression (CAPM)
                            OLS Regression Results                            
Dep. Variable:                  XSret   R-squared:                       0.818
Model:                            OLS   Adj. R-squared:                  0.816
Method:                 Least Squares   F-statistic:                     350.4
Date:                Mon, 16 May 2022   Prob (F-statistic):           1.40e-30
Time:                        20:59:11   Log-Likelihood:                 276.64
No. Observations:                  80   AIC:                            -549.3
Df Residuals:                      78   BIC:                            -544.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0004    

In [115]:
# Vanguard Multi-Factor Model
print("Vanguard Multi-Factor Model")
model3F = sm.OLS(Y,X3).fit()
print(model3F.summary())

Vanguard Multi-Factor Model
                            OLS Regression Results                            
Dep. Variable:                  XSret   R-squared:                       0.860
Model:                            OLS   Adj. R-squared:                  0.853
Method:                 Least Squares   F-statistic:                     115.4
Date:                Mon, 16 May 2022   Prob (F-statistic):           3.00e-31
Time:                        20:59:11   Log-Likelihood:                 287.21
No. Observations:                  80   AIC:                            -564.4
Df Residuals:                      75   BIC:                            -552.5
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0002   

## Attribution

In [116]:
returnReg.describe()

Unnamed: 0,daily_returns,russell_2000,sp_500,RF_Daily_Returns,VBR_Daily_Returns,VIG_Daily_Returns,VOO_Daily_Returns,VTV_Daily_Returns,VUG_Daily_Returns,VV_Daily_Returns,XSret,MKTRF,VTV_VUG,VIG_VOO,VV_VBR,RF
count,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0
mean,-0.001308,-0.001007,-0.000925,-0.000316,-0.000384,-0.000675,-0.000894,-0.000189,-0.001649,-0.00099,-0.000992,-0.00057,0.001445,0.000278,-0.000651,-0.000308
std,0.017895,0.01716,0.014485,0.001394,0.014277,0.012135,0.014533,0.010556,0.020556,0.015051,0.017973,0.014535,0.01431,0.004642,0.006646,0.001394
min,-0.0363,-0.0326,-0.0363,-0.005081,-0.033005,-0.032144,-0.03673,-0.030057,-0.043954,-0.036979,-0.034999,-0.034804,-0.037155,-0.010769,-0.01715,-0.005081
25%,-0.018425,-0.0155,-0.012125,-0.000986,-0.012024,-0.009466,-0.012144,-0.0076,-0.016744,-0.012648,-0.016554,-0.010883,-0.009178,-0.002912,-0.005439,-0.000986
50%,-0.00315,-0.00285,-0.0005,-0.000237,-0.000295,-0.002331,-0.001032,-0.000208,-0.001414,-0.001559,-0.002541,-0.000301,0.002137,0.000273,-0.000188,-0.000177
75%,0.013,0.011075,0.00925,0.00048,0.009758,0.006713,0.008954,0.006759,0.01486,0.008628,0.013335,0.009196,0.013283,0.003588,0.004497,0.00048
max,0.0419,0.0314,0.0299,0.003088,0.029103,0.027745,0.030335,0.029831,0.039165,0.029569,0.042493,0.027678,0.029192,0.010266,0.012011,0.003088


In [None]:
# Do it ((final close / inception close) -1) - another one

In [117]:
# Coefficients of multi-factor regression
print(model3F.params)

const     -0.000218
MKTRF      0.892907
VTV_VUG   -0.354215
VIG_VOO   -0.205344
VV_VBR    -0.466118
dtype: float64


In [118]:
averages = [1, -0.000570, 0.001445, 0.000278, -0.000651]

In [119]:
# Attribution in %s
print(model3F.params * averages * 100)

const     -0.021781
MKTRF     -0.050896
VTV_VUG   -0.051184
VIG_VOO   -0.005709
VV_VBR     0.030344
dtype: float64


In [120]:
# What should the sum equal? To the total return - risk free rate
sum(model3F.params * averages * 100)

-0.09922548957569233