In [59]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn import linear_model
import scipy.stats as stats
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.precision", 4)

## Q1

1. Value factor(HML) takes a long position on firms with high book-to-market and a short position on firms with low book-to-market. This reduces its corr to MKT by identifying a specific characteristic of the stocks and utilize it. It is also different to Size factor, since size factor(SMB) takes a long position on firms with small market capitalizations and a short position on firms with large market capitalizations. The classification of small and large can be based on percentiles,quintiles or deciles. 

2. Even if Value has underperformed the market in these subsamples, it's imperfect correlation with the market could lead to diversification benefits as we saw in the MV optimization. This imperfect correlation can lead to risk reduction at the margin for any investor completely invested in funds tracking the broader equity index. 

3. All these style factors can come in handy for both a linear factor decomposition and in a linear pricing model, as long as the combination of these factors do not posit heavy correlations. All the factors have posted positive risk premia for a longer duration, but due regime changes and changes in market microstructure resulting from advancing technologies and changing investor behavior, some factors might now prove to be redundant or might not display positive risk premia for a longer duration. 

4. False for TR and True for SR. The Treynor Ratio for all securities are equal to the market excess return. The market TR is also equal to that since the market beta is 1. The Sharpe Ratio of any security is equal to its correlation to the market factor times the market SR. Therefore, the market SR is the largest as its correlation to itself is 1. 

5. The biggest difference between long-only and long-short is that long-only is highly correlated to the Market factor while long-short is NOT. That is the reason we don't want to invest in long-only although its mean and SR are good sometimes. 

6. From class, I think we can take advantage of this small beta and R^2. Since beta indicates a small autocorrelation, small AR times large returns will give sizable returns in the following period and we can magnify this by shorting the losers. Also we can hold a lot of stocks to do diversification based on small R^2. 

7. Size and Value factors has a positive exposure while the market and Momentum factors should have 0 exposure. 

8. The residuals are uncorrelated across regressions so that we can diversify the idiosyncratic risks. Also, there has to be no arbitrage. The factors must describes expected returns across assets. 

9. From the class, we know that APT gives conditions for a LFD of return variation implies LFPM for risk premia. That means LFPM has more restriction than LFD in order to work. Therefore, we would expect that if LFPM fits perfectly, LFD should also fit well. 

## Q2

In [104]:
factors = pd.read_excel('midterm_2.xlsx',sheet_name = 'factors (excess returns)', index_col = 'Date')
factors.tail()

Unnamed: 0_level_0,MKT,RMW
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-05-31,-0.0034,0.0144
2022-06-30,-0.0843,0.0185
2022-07-31,0.0957,0.0068
2022-08-31,-0.0378,-0.048
2022-09-30,-0.0936,-0.014


In [105]:
assets = pd.read_excel('midterm_2.xlsx',sheet_name = 'assets (excess returns)', index_col = 'Date') 
assets.tail()

Unnamed: 0_level_0,BWX,DBC,EEM,EFA,HYG,IEF,IYR,PSP,QAI,SPY,TIP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-05-31,0.0048,0.0455,0.0055,0.0193,0.0156,0.0055,-0.0451,0.0144,-0.0047,0.0016,-0.0106
2022-06-30,-0.0457,-0.0742,-0.0508,-0.0869,-0.0697,-0.0079,-0.0682,-0.1317,-0.0329,-0.0817,-0.0304
2022-07-31,0.0196,-0.0208,-0.0044,0.0508,0.0661,0.0287,0.0877,0.1081,0.0179,0.0912,0.0422
2022-08-31,-0.0575,-0.0162,-0.0145,-0.0625,-0.0444,-0.0398,-0.0599,-0.0848,-0.0129,-0.0421,-0.0283
2022-09-30,-0.0636,-0.0707,-0.1157,-0.0925,-0.0377,-0.0476,-0.1274,-0.1466,-0.0401,-0.0927,-0.0676


Time Series Test

In [107]:
ts = pd.DataFrame(data = None, index = assets.columns, columns = ['a', 'MKT', 'RMW', 'R^2'])

for asset in ts.index:
    y = assets[asset]
    X = sm.add_constant(factors[['MKT','RMW']])
    reg = sm.OLS(y, X).fit()
    ts.loc[asset] = [reg.params[0] * 12, reg.params[1], reg.params[2], reg.rsquared]
    
ts

Unnamed: 0,a,MKT,RMW,R^2
BWX,-0.0406,0.2363,0.1024,0.2047
DBC,-0.052,0.5932,-0.1086,0.2568
EEM,-0.0689,0.9436,-0.0948,0.5601
EFA,-0.0582,0.9313,-0.0775,0.7622
HYG,-0.0004,0.4532,0.0362,0.5886
IEF,0.024,-0.0818,0.1693,0.082
IYR,0.001,0.9134,0.2731,0.5514
PSP,-0.0659,1.3408,-0.1098,0.8393
QAI,-0.0237,0.2782,0.0028,0.7174
SPY,0.0006,0.9629,0.1259,0.9924


In [108]:
MAE_alpha = (100 * ts['a']).abs().mean()
print('MAE = {:.2f} %'.format(MAE_alpha))

MAE = 3.11 %


In [109]:
r2_mean = ts['R^2'].mean() 
r2_mean

0.515513116322

1. (b) Under CAPM, alpha should be 0, and therefore MAE should also be 0. Beta should be on the SML. We should know nothing from R^2 of the time series test and nothing from the mean of R^2 as well.   

In [111]:
assets.mean() * 12 

BWX   -0.0051
DBC    0.0272
EEM    0.0592
EFA    0.0687
HYG    0.0633
IEF    0.0172
IYR    0.1349
PSP    0.1168
QAI    0.0149
SPY    0.1373
TIP    0.0245
dtype: float64

1. (c) Relative to the mean returns of the assets. I think this MAE is relatively large and this will make the pricing not so reliable. 

Cross-sectional Test

In [112]:
y = pd.DataFrame(assets.mean(), index=assets.columns, columns = ['Mean'])
X = sm.add_constant(ts[['MKT', 'RMW']].astype(float))
reg = sm.OLS(y,X,missing='drop').fit() 

In [113]:
reg.params[0] * 12 

-0.0017850143349516577

In [117]:
reg.params[1:] * 12

MKT    0.1186
RMW    0.2101
dtype: float64

In [115]:
reg.rsquared

0.905532790584489

In [116]:
pred_cs = reg.params[0] + ts[['MKT', 'RMW']] @ reg.params[1:] 
mae = (assets.mean() - pred_cs).abs().mean() 
print('MAE = {:.2f} %'.format(mae*12*100))

MAE = 1.09 %


2. (b) The intercept and the MAE should both be 0. The factor premia should be the corresponding expected excess return. R^2 should be 1. 

2. (c) I think this model could price the assets pretty good. The intercept is significantly small and the R^2 show that the beta represent well of the asset returns. The MAE also decreased comparing to time-series test. Although the MAE must decrease but this is still a good sign for the model fit. 

3. I think that RMW is more lucrative according to the Cross-sectional test as its beta in this test is even larger than the market factor. 

In [136]:
asset_pre = pd.DataFrame(data = assets.mean()*12, index = assets.columns, columns = ['Sample Average'])
pred_ts = (factors.mean() * ts[['MKT','RMW']]).sum(axis = 1)
asset_pre['LFPM TS'] = pred_ts *12
pred_cs_no_inter = ts[['MKT', 'RMW']] @ reg.params[1:] 
asset_pre['LFPM CS'] = pred_cs_no_inter *12
asset_pre

Unnamed: 0,Sample Average,LFPM TS,LFPM CS
BWX,-0.0051,0.0355,0.0495
DBC,0.0272,0.0792,0.0476
EEM,0.0592,0.1281,0.092
EFA,0.0687,0.1269,0.0942
HYG,0.0633,0.0637,0.0614
IEF,0.0172,-0.0068,0.0259
IYR,0.1349,0.1339,0.1657
PSP,0.1168,0.1827,0.136
QAI,0.0149,0.0386,0.0336
SPY,0.1373,0.1367,0.1407


## Q3

In [101]:
def tangency_portfolio_rfr(asset_return,cov_matrix, cov_diagnolize = False):
    """ 
        Returns the tangency portfolio weights in a (1 x n) vector
        Inputs: 
            asset_return - return for each asset (n x 1) Vector
            cov_matrix = nxn covariance matrix for the assets
    """
    if cov_diagnolize:
        asset_cov = np.diag(np.diag(cov_matrix))
    else:
        asset_cov = np.array(cov_matrix)
    inverted_cov= np.linalg.inv(asset_cov)
    one_vector = np.ones(len(cov_matrix.index))
    
    den = (one_vector @ inverted_cov) @ (asset_return)
    num =  inverted_cov @ asset_return
    return (1/den) * num

In [138]:
tan_wt = pd.DataFrame(data = tangency_portfolio_rfr(assets.mean(), assets.cov()), index = assets.columns, columns = ['Weight']) 
tan_wt

Unnamed: 0,Weight
BWX,-16.6834
DBC,2.6617
EEM,5.0023
EFA,-1.6528
HYG,15.8455
IEF,31.6714
IYR,-4.251
PSP,-6.9802
QAI,-60.2653
SPY,33.1711


In [135]:
ret = np.dot(tan_wt['Weight'],assets.mean()*12) 
vol = np.sqrt(tan_wt['Weight'] @ assets.cov() @ tan_wt['Weight'].T)*np.sqrt(12)
sharpe = ret/vol 
sharpe

1.8412625675044252

In [139]:
tan_wt_cs = pd.DataFrame(data = tangency_portfolio_rfr(pred_cs_no_inter, assets.cov()), index = assets.columns, columns = ['Weight']) 
tan_wt_cs

Unnamed: 0,Weight
BWX,0.337
DBC,-0.0287
EEM,-0.0203
EFA,-0.1755
HYG,0.2264
IEF,-0.1946
IYR,0.0735
PSP,-0.5109
QAI,-1.3843
SPY,1.3865


In [145]:
normal = 1 - stats.norm().cdf(-3/7) 
normal

0.6658824291023754