In [2]:
import numpy as np
import pandas as pd

In [3]:
#path = 'C:/Users/mphan/Documents/Behavioral-Finance/hmwk1/'
path = ''

# Jan 1963 - Dec 2017
aqr_ff_data = pd.read_csv(path + 'aqr_ff_data.csv')
aqr_ff_data['date'] = pd.to_datetime(aqr_ff_data['date'], format="%Y%m")

# 1955 - 2014
kent_data = pd.read_csv(path + 'bf_hw1.csv')
kent_data['date'] = pd.to_datetime(kent_data['date'], format="%m-%d-%Y")
# Converting to percent since the aqr data is in percent
kent_data[['isu', 'acr', 'ivol1m']] = kent_data[['isu', 'acr', 'ivol1m']]*100

In [4]:
combined = pd.merge(aqr_ff_data, kent_data, how='outer')
combined.columns = map(str.upper, combined.columns)
combined.sort_values(by=['DATE'], inplace=True)

ISU, ACR, IVOL1M factors (starts in 1955)

In [5]:
combined.head()

Unnamed: 0,DATE,MKT-RF,SMB,HML,RMW,CMA,MOM,STR,BAB,QMJ,RF,ISU,ACR,IVOL1M
654,1955-07-01,,,,,,,,,,,0.744056,3.622477,6.539146
655,1955-08-01,,,,,,,,,,,-2.14892,-1.764342,-1.066654
656,1955-09-01,,,,,,,,,,,0.086056,0.362991,4.214729
657,1955-10-01,,,,,,,,,,,-0.588581,-0.47622,1.555679
658,1955-11-01,,,,,,,,,,,-2.367149,-1.754872,1.63382


Mkt-Rf, SMB, HML, etc.. factors (starts in 1963)

In [6]:
combined.loc[(~combined['SMB'].isin([np.nan])),].head()

Unnamed: 0,DATE,MKT-RF,SMB,HML,RMW,CMA,MOM,STR,BAB,QMJ,RF,ISU,ACR,IVOL1M
0,1963-07-01,-0.39,-0.46,-0.81,0.72,-1.16,0.99,-0.04,0.81,0.47,0.27,-0.205056,-0.473696,1.245273
1,1963-08-01,5.07,-0.81,1.65,0.42,-0.4,1.08,1.36,-1.22,-0.63,0.25,0.20868,1.021123,0.40425
2,1963-09-01,-1.57,-0.48,0.19,-0.8,0.23,0.13,1.2,1.07,0.61,0.27,1.130358,0.061623,0.774335
3,1963-10-01,2.53,-1.29,-0.09,2.75,-2.26,3.14,-0.66,-1.88,2.08,0.29,1.426423,3.028919,0.218016
4,1963-11-01,-0.85,-0.85,1.71,-0.34,2.22,-0.75,1.15,-0.52,-0.88,0.27,0.910432,-1.318862,2.087017


# Question 1

Number of observations

In [7]:
combined.iloc[:, 1:combined.columns.size].count()

MKT-RF    654
SMB       654
HML       654
RMW       654
CMA       654
MOM       654
STR       654
BAB       654
QMJ       654
RF        654
ISU       707
ACR       707
IVOL1M    707
dtype: int64

Annualized factor returns, volatilities, and sharpe ratios

In [8]:
factor_returns = combined.iloc[:, 1:combined.columns.size].agg(['mean'])*12
factor_returns

Unnamed: 0,MKT-RF,SMB,HML,RMW,CMA,MOM,STR,BAB,QMJ,RF,ISU,ACR,IVOL1M
mean,6.371743,3.005688,4.144587,3.000734,3.441468,7.911009,5.791009,9.539266,4.369541,4.63633,3.89799,1.947849,8.89787


In [9]:
factor_vol = combined.iloc[:, 1:combined.columns.size].agg(['std'])*np.sqrt(12)
factor_vol

Unnamed: 0,MKT-RF,SMB,HML,RMW,CMA,MOM,STR,BAB,QMJ,RF,ISU,ACR,IVOL1M
std,15.200588,10.477238,9.729386,7.664816,6.94564,14.529008,10.730704,10.962232,8.228669,0.919295,8.165429,5.760879,22.164692


In [10]:
sharpe = factor_returns.iloc[0,]/factor_vol.iloc[0,]
sharpe

MKT-RF    0.419177
SMB       0.286878
HML       0.425987
RMW       0.391495
CMA       0.495486
MOM       0.544498
STR       0.539667
BAB       0.870194
QMJ       0.531014
RF        5.043352
ISU       0.477377
ACR       0.338117
IVOL1M    0.401443
dtype: float64

# Question 2

In [11]:
factor_corr = combined.corr(method='pearson')
factor_corr

Unnamed: 0,MKT-RF,SMB,HML,RMW,CMA,MOM,STR,BAB,QMJ,RF,ISU,ACR,IVOL1M
MKT-RF,1.0,0.273734,-0.258937,-0.232352,-0.383175,-0.132408,0.282365,-0.085063,-0.524648,-0.084408,0.472874,-0.000292,-0.501423
SMB,0.273734,1.0,-0.068692,-0.349837,-0.100964,-0.028809,0.158468,-0.020474,-0.514492,-0.043436,0.156561,0.158716,-0.667716
HML,-0.258937,-0.068692,1.0,0.065707,0.696096,-0.183351,0.002098,0.319836,-0.021364,0.073936,-0.550018,-0.153199,0.317864
RMW,-0.232352,-0.349837,0.065707,1.0,-0.032732,0.1118,-0.089537,0.282811,0.739996,0.002009,-0.092415,-0.324595,0.606138
CMA,-0.383175,-0.100964,0.696096,-0.032732,1.0,-0.02112,-0.119658,0.313925,0.088641,0.069021,-0.628201,-0.057307,0.366229
MOM,-0.132408,-0.028809,-0.183351,0.1118,-0.02112,1.0,-0.288706,0.179076,0.249623,0.064349,0.058403,0.13062,0.193672
STR,0.282365,0.158468,0.002098,-0.089537,-0.119658,-0.288706,1.0,-0.05701,-0.248608,0.033714,0.141484,-0.050521,-0.204246
BAB,-0.085063,-0.020474,0.319836,0.282811,0.313925,0.179076,-0.05701,1.0,0.203389,0.002031,-0.207414,-0.126594,0.358085
QMJ,-0.524648,-0.514492,-0.021364,0.739996,0.088641,0.249623,-0.248608,0.203389,1.0,0.045346,-0.27051,-0.206491,0.762861
RF,-0.084408,-0.043436,0.073936,0.002009,0.069021,0.064349,0.033714,0.002031,0.045346,1.0,-0.066411,0.05234,0.107295


**The correlation between the IVOL1M (low idiosyncratic volatility stocks earn higher returns) and SMB is the most negative**

In [12]:
# This is the correlation between IVOL and SMB
factor_corr.unstack().min()

-0.6677161204219095

The SMB factor is negatively correlated with the IVOL1M factor because the portfolio containinng stocks with high idiosyncratic volatility most likely is in large part composed of stocks with small market equity. Thus, when the SMB factor earns higher returns (small stocks outperform big stocks), the IVOL1M factor earns lower returns (because the factor shorts high idiosyncratic volatility stocks).

**The correlation between QMJ and IVOL1M is the most positive**

In [13]:
# This is the correlation between QMJ and IVOL1M
factor_corr[factor_corr!=1].unstack().max()

0.7628607822394143

It makes sense that the IVOL1M factor is positively correlated with the QMJ factor. According to Asness et al. the quality score of a particular stock is based on its profitability, growth, safeness (based on return volatility/beta and financial statement metrics), and payout. They found that 'high' quality stocks (i.e those with high profitability, growing, low return volatility, and high payout) earn higher expected returns. You would expect these stocks to also have low idiosyncratic volatility as they are more likely to be owned by index investors and value investors. Though it is not necessarily causal, there could also be a positive correlation between low idiosyncratic volatility and low total volatility (which Asness et al. used to construct their quality score)

# Question 3

In [14]:
subsample = combined[combined['DATE'].between('1963-07', '2014-05', inclusive=True)]
subsample.head()

Unnamed: 0,DATE,MKT-RF,SMB,HML,RMW,CMA,MOM,STR,BAB,QMJ,RF,ISU,ACR,IVOL1M
0,1963-07-01,-0.39,-0.46,-0.81,0.72,-1.16,0.99,-0.04,0.81,0.47,0.27,-0.205056,-0.473696,1.245273
1,1963-08-01,5.07,-0.81,1.65,0.42,-0.4,1.08,1.36,-1.22,-0.63,0.25,0.20868,1.021123,0.40425
2,1963-09-01,-1.57,-0.48,0.19,-0.8,0.23,0.13,1.2,1.07,0.61,0.27,1.130358,0.061623,0.774335
3,1963-10-01,2.53,-1.29,-0.09,2.75,-2.26,3.14,-0.66,-1.88,2.08,0.29,1.426423,3.028919,0.218016
4,1963-11-01,-0.85,-0.85,1.71,-0.34,2.22,-0.75,1.15,-0.52,-0.88,0.27,0.910432,-1.318862,2.087017


The portfolio of the highest Sharpe ratio is the tangent portfolio of the mean-variance frontier with and without the risk-free asset. 

Let $\mu$ be the mean factor returns, $\Sigma$ be the factor covariance matrix, $l$ be a $12\times1$ vector of ones, and $r_f$ be the risk free rate. To get the maximum Sharpe ratio portfolio, we consider the following problem:
\begin{equation*}
\begin{aligned}
	& \underset{\alpha}{\text{max}}
	& & \alpha'(\mu-\bar{r}\mathbf{1})-w\alpha'\Sigma\alpha
\end{aligned}
\end{equation*}
The F.O.C. is
$$\mu-\bar{r}\mathbf{1}=2w\Sigma\alpha^*\implies\alpha^*=\frac{\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}{2w}.$$
We then choose $w$ so that $\alpha^{*\prime}\mathbf{1}=1$ to get tangency portfolio.
$$1=\mathbf{1}'\alpha^*=\frac{\mathbf{1}'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}{2w}\implies 2w=\mathbf{1}'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})$$
$$\implies \alpha^*=\frac{\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}{\mathbf{1}'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}.$$
Thus
$$\mu_T-\bar{r}=(\mu-\bar{r}\mathbf{1})'\alpha^*=\frac{(\mu-\bar{r}\mathbf{1})'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}{\mathbf{1}'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}$$
and
$$\sigma_T^2=\frac{(\mu-\bar{r}\mathbf{1})'\Sigma^{-1}\Sigma\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}{(\mathbf{1}'\Sigma^{-1}(\mu-\bar{r}\mathbf{1}))^2}\implies\sigma_T=\frac{\sqrt{(\mu-\bar{r}\mathbf{1})'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}}{\mathbf{1}'\Sigma^{-1}(\mu-\bar{r}\mathbf{1})}.$$
Therefore the maximum Sharpe Ratio is 
$$SR^*=\frac{\mu_T-\bar{r}}{\sigma_T}=\sqrt{(\mu-r_fl)'\Sigma^{-1}(\mu-r_fl)}.$$
Since we only have one observation of the realized values of the factors, we cannot calculate emsemble moments. With resort to stationarity and ergodicity, we use the time-series moments as of June 1963 to estimate emsemble moments.

In [21]:
risky = subsample.drop(['DATE', 'RF'], axis=1)
mu = risky.mean()
Sigma = risky.cov()
rf = subsample['RF'].mean()
l = np.ones([np.shape(subsample)[1]-2, 1])

from numpy.linalg import inv
alpha = np.dot(inv(Sigma), mu - rf)
SR = np.sqrt(np.dot(np.dot(mu - rf, inv(Sigma)), mu - rf))

Assume that the scaling constant is $c$, then 
$$(c\alpha)'\Sigma^{-1}(c\alpha)=(20\%)^2\implies c=\frac{20\%}{\sqrt{\alpha'\Sigma^{-1}\alpha}}.$$

In [22]:
alpha = alpha * 0.2 / np.sqrt(np.dot(np.dot(alpha, inv(Sigma)), alpha))

In [23]:
print(alpha)
print(SR)

[-0.01030915 -0.0307987  -0.01538546 -0.11722824 -0.12137739  0.0272793
  0.01591952  0.06219642 -0.04746366 -0.05872774 -0.15103461  0.01548979]
0.32509971193178366


# Question 4

In [63]:
mve = np.dot(risky, alpha)
hml = subsample['HML']
df = np.array([hml, mve])
df = pd.DataFrame(data=np.transpose(df))
df.columns = ['HML', 'MVE']

In [75]:
import statsmodels.formula.api as sm
result = sm.ols(formula="HML ~ MVE", data=df).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                    HML   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.6690
Date:                Tue, 06 Feb 2018   Prob (F-statistic):              0.414
Time:                        14:32:37   Log-Likelihood:                -1501.4
No. Observations:                 611   AIC:                             3007.
Df Residuals:                     609   BIC:                             3016.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.3652      0.116      3.143      0.0

In [78]:
SMB = subsample['SMB']
df = np.array([SMB, mve])
df = pd.DataFrame(data=np.transpose(df))
df.columns = ['SMB', 'MVE']
result = sm.ols(formula="SMB ~ MVE", data=df).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                    SMB   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     12.54
Date:                Tue, 06 Feb 2018   Prob (F-statistic):           0.000429
Time:                        14:34:29   Log-Likelihood:                -1541.3
No. Observations:                 611   AIC:                             3087.
Df Residuals:                     609   BIC:                             3095.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.1959      0.124      1.580      0.1

# Question 5

In [81]:
pre = combined[combined['DATE'].between('1963-07', '1988-12', inclusive=True)]
post = combined[combined['DATE'].between('1989-01', '2014-05', inclusive=True)]

In [105]:
SR_pre = pre.mean()*12/(pre.std()*np.sqrt(12))
print(SR_pre)

MKT-RF    0.265861
SMB       0.440098
HML       0.687344
RMW       0.315589
CMA       0.635310
MOM       0.758511
STR       1.089496
BAB       1.152317
QMJ       0.542929
RF        8.338218
ISU       0.199198
ACR       0.499931
IVOL1M    0.453934
dtype: float64


In [106]:
SR_post = post.mean()*12/(post.std()*np.sqrt(12))
print(SR_post)

MKT-RF    0.516345
SMB       0.177808
HML       0.276392
RMW       0.458646
CMA       0.485853
MOM       0.432734
STR       0.192094
BAB       0.661038
QMJ       0.479629
RF        4.688866
ISU       0.910353
ACR       0.108419
IVOL1M    0.332248
dtype: float64
