# Homework 7

### FINM 36700
### Group C 14

## Imports

In [52]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from arch import arch_model
from arch.univariate import GARCH, EWMAVariance
from sklearn import linear_model
import scipy.stats as stats
from statsmodels.regression.rolling import RollingOLS
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.precision", 4)
sns.set(rc={'figure.figsize':(15, 10)})

## Data

In [53]:
df = pd.read_excel('../data/gmo_analysis_data.xlsx', sheet_name=1)
df = df.rename(columns={"Unnamed: 0": "Date"})
df = df.set_index("Date").dropna()

df.head()

Unnamed: 0_level_0,DP,EP,US10Y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1993-02-28,2.82,4.44,6.03
1993-03-31,2.77,4.41,6.03
1993-04-30,2.82,4.44,6.05
1993-05-31,2.81,4.38,6.16
1993-06-30,2.79,4.31,5.8


In [54]:
rf = pd.read_excel('../data/gmo_analysis_data.xlsx', sheet_name=3)
rf = rf.rename(columns={"Unnamed: 0": "Date"})
rf.set_index("Date", inplace = True)

rf.head()

Unnamed: 0_level_0,US3M
Date,Unnamed: 1_level_1
1993-02-28,0.0025
1993-03-31,0.0025
1993-04-30,0.0025
1993-05-31,0.0026
1993-06-30,0.0026


In [55]:
GMO = pd.read_excel('../data/gmo_analysis_data.xlsx', sheet_name=2)

GMO = GMO.rename(columns={"Unnamed: 0": "Date"})
GMO.set_index("Date", inplace = True)

GMO_ex = GMO.dropna().subtract(rf['US3M'], axis=0).dropna()

GMO_ex.head()

Unnamed: 0_level_0,SPY,GMWAX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1996-11-30,0.0687,0.0409
1996-12-31,-0.0282,-0.0156
1997-01-31,0.0575,0.0104
1997-02-28,0.0052,0.0179
1997-03-31,-0.0486,-0.0196


## 2. Analyzing GMO

### 2 - 1.

In [56]:
# helper function
def summary_stats(df, annual_fac = 12):
    stats_df = pd.DataFrame(data = None)
    stats_df['Mean'] = df.mean()*annual_fac
    stats_df['Vol'] = df.std()*np.sqrt(annual_fac)
    stats_df['Sharpe'] = df.mean()/df.std()*np.sqrt(annual_fac)

    return stats_df

In [57]:
summary_stats(GMO_ex.loc[:'2011', ['GMWAX']])

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0158,0.125,0.1266


In [58]:
summary_stats(GMO_ex.loc['2012':, ['GMWAX']])

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0366,0.092,0.3982


In [59]:
summary_stats(GMO_ex[['GMWAX']])

Unnamed: 0,Mean,Vol,Sharpe
GMWAX,0.0245,0.1123,0.2181


- The mean, volatility, and Sharpe ratio has changed a lot since the case. GMO's performance from 2012 and further is much better than the period before.

### 2 - 2.

In [60]:
# helper function
def tail_risk(df):
    tr_df = pd.DataFrame(data = None)
    tr_df['Min return'] = df.min()
    tr_df['VaR-5th'] = df.quantile(.05)
    cum_ret = (1 + df).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    tr_df['Max Drawdown'] = drawdown.min()

    return tr_df

In [61]:
tail_risk(GMO_ex.loc[:'2011', ['GMWAX']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.1492,-0.0598,-0.4729


In [62]:
tail_risk(GMO_ex.loc[:'2011', ['SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
SPY,-0.1656,-0.0802,-0.56


In [63]:
tail_risk(GMO_ex.loc['2012':, ['GMWAX']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.1187,-0.0397,-0.226


In [64]:
tail_risk(GMO_ex.loc['2012':, ['SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
SPY,-0.1247,-0.0687,-0.2481


In [65]:
tail_risk(GMO_ex[['GMWAX']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
GMWAX,-0.1492,-0.0483,-0.4729


In [66]:
tail_risk(GMO_ex[['SPY']])

Unnamed: 0,Min return,VaR-5th,Max Drawdown
SPY,-0.1656,-0.08,-0.56


#### (a):
- GMWAX has lower tail-risk than SPY as seen by these stats. Also, this applies to all three samples. Therefore, GMWAX in general has lower tail-risk across all time period.

#### (b):
- Yes, GMWAX's tail-risk does vary much across the two subsamples. It has lower VaR in the second subsample which is indicated as the year of 2012 and forward. Also, the Max Drawdown is much lower in the second subsample. Thus, GMWAX improves substantially in the second subsample

### 2 - 3.

In [67]:
# helper functino
def reg_params(df, y_col, X_col, intercept = True, annual_fac=12):
    y = df[y_col]
    if intercept == True:
        X = sm.add_constant(df[X_col])
    else:
        X = df[X_col]

    model = sm.OLS(y, X, missing = 'drop').fit()
    reg_df = model.params.to_frame('Regression Parameters')
    reg_df.loc[r'$R^{2}$'] = model.rsquared

    if intercept == True:
        reg_df.loc['const'] *= annual_fac

    return reg_df

#### (a):

In [68]:
reg_params(GMO_ex.loc[:'2011'], 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.0058
SPY,0.5396
$R^{2}$,0.5071


In [69]:
reg_params(GMO_ex.loc['2012':], 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.0345
SPY,0.5622
$R^{2}$,0.7645


In [70]:
reg_params(GMO_ex, 'GMWAX', 'SPY')

Unnamed: 0,Regression Parameters
const,-0.017
SPY,0.5456
$R^{2}$,0.5777


#### (b):
- GMWAS is a low-beta strategy. The beta has not changed that much since the case, so we could say that it is a low-beta strategy across all samples and periods.

#### (c):
- GMWAX does not provide alpha, because the alpha or the constant is negative. That has not changed since the case. The alpha is negative across all samples and periods.

## 3. Forecasting Regression
### 3 - 1.

In [71]:
df = df.shift()
df['SPY'] = GMO['SPY']

df.head()

Unnamed: 0_level_0,DP,EP,US10Y,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-02-28,,,,0.0107
1993-03-31,2.82,4.44,6.03,0.0224
1993-04-30,2.77,4.41,6.03,-0.0256
1993-05-31,2.82,4.44,6.05,0.027
1993-06-30,2.81,4.38,6.16,0.0037


In [72]:
DP = reg_params(df, 'SPY', 'DP')

DP

Unnamed: 0,Regression Parameters
const,-0.1129
DP,0.0094
$R^{2}$,0.0094


In [73]:
EP = reg_params(df, 'SPY', 'EP')

EP

Unnamed: 0,Regression Parameters
const,-0.0712
EP,0.0032
$R^{2}$,0.0086


In [74]:
EP_DP_10Y = reg_params(df, 'SPY', ['EP','DP','US10Y'])

EP_DP_10Y

Unnamed: 0,Regression Parameters
const,-0.1792
EP,0.0027
DP,0.008
US10Y,-0.001
$R^{2}$,0.0163


### 3 - 2.

In [75]:
# scaling
w_DP = 100 * (DP.loc['const'][0]/12 + DP.loc['DP'][0] * df['DP'])

r_DP = (w_DP * df['SPY']).dropna()

w_EP = 100 * (EP.loc['const'][0]/12 + EP.loc['EP'][0] * df['EP'])

r_EP = (w_EP * df['SPY']).dropna()

w_3fac = 100 * (EP_DP_10Y.loc['const'][0]/12 + EP_DP_10Y.loc['EP'][0] * df['EP']\
                                             + EP_DP_10Y.loc['DP'][0] * df['DP']\
                                             + EP_DP_10Y.loc['US10Y'][0] * df['US10Y'])

r_3fac = (w_3fac * df['SPY']).dropna()

In [76]:
# helper function
def summary_stats_bm(series, bm, annual_fac=12):
    ss_df = pd.DataFrame(data = None, index = ['Summary Stats'])
    ss_df['Mean'] = series.mean() * annual_fac
    ss_df['Vol'] = series.std() * np.sqrt(annual_fac)
    ss_df['Sharpe (Mean/Vol)'] = ss_df['Mean'] / ss_df['Vol']

    y = series
    X = sm.add_constant(bm.loc[series.index])
    reg = sm.OLS(y,X).fit().params
    ss_df[r'$\alpha$'] = reg[0] * annual_fac
    ss_df[r'$\beta^{SPY}$'] = reg[1]

    cum_ret = (1 + series).cumprod()
    rolling_max = cum_ret.cummax()
    drawdown = (cum_ret - rolling_max) / rolling_max
    ss_df['Max Drawdown'] = drawdown.min()

    return round(ss_df, 4)

In [77]:
summary_stats_bm(r_DP, df[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown
Summary Stats,0.1095,0.149,0.7348,0.0207,0.8611,-0.653


In [78]:
summary_stats_bm(r_EP, df[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown
Summary Stats,0.1078,0.1286,0.8383,0.0322,0.7327,-0.3823


In [79]:
summary_stats_bm(r_3fac, df[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown
Summary Stats,0.125,0.1456,0.8588,0.0451,0.775,-0.5221


### 3 - 3.
#### (a):

In [80]:
VaR = pd.DataFrame([r_DP.quantile(.05), r_EP.quantile(.05), r_3fac.quantile(.05),
                    df['SPY'].quantile(.05),
                    GMO['GMWAX'].quantile(.05)],
                   index = ['DP Strat','EP Strat','3-factor Strat','SPY','GMO'],
                   columns = ['5% VaR'])

VaR

Unnamed: 0,5% VaR
DP Strat,-0.0523
EP Strat,-0.0541
3-factor Strat,-0.0642
SPY,-0.0739
GMO,-0.0473


#### (b):

In [81]:
summary_stats(r_DP.to_frame('DP Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
DP Strat,0.0393,0.1842,0.2135


In [82]:
summary_stats(r_EP.to_frame('EP Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
EP Strat,0.0373,0.1339,0.2784


In [83]:
summary_stats(r_3fac.to_frame('3-factor Strat').loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
3-factor Strat,0.0608,0.1574,0.3863


In [84]:
summary_stats(rf.loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
US3M,0.0231,0.0058,3.9866


- All the dynamic strategies outperform the risk-free rate during this period.

#### (c):

In [85]:
r_df = r_3fac.to_frame('3-factor Strat')
r_df['DP Strat'] = r_DP
r_df['EP Strat'] = r_EP
r_df['rf'] = rf['US3M']

df_riskprem = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df.columns[:3]:
    df_riskprem[col] = len(r_df[r_df[col] < r_df['rf']])/len(r_df) * 100

df_riskprem

Unnamed: 0,3-factor Strat,DP Strat,EP Strat
% of periods underperforming $r^{f}$,37.0787,37.3596,37.3596


#### (d):
- No, judging by the tail risk metrics and volatility compared to SPY, the dynamic strategy does not seem to take on extra risk on the whole.

- However, we must keep in mind that the strategies are dependent on running regressions with very little prediction power, so badly estimated parameters could lead to terrible performance. (Fortunately, this was not the case in our backtesting since we did not observe high volatility or tail risk)

## 4 Out-of-Sample Forecasting

In [86]:
# helper function
def OOS_r2(df, factors, start):
    y = df['SPY']
    X = sm.add_constant(df[factors])

    forecast_err, null_err = [], []

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            null_forecast = currY.mean()
            reg_predict = reg.predict(X.iloc[[i]])
            actual = y.iloc[[i]]
            forecast_err.append(reg_predict - actual)
            null_err.append(null_forecast - actual)

    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()

    return 1 - RSS/TSS

###  4 - 1.

In [87]:
EP_OOS_r2 = OOS_r2(df, ['EP'], 60)

print('EP OOS R-squared: ' + str(round(EP_OOS_r2, 4)))

EP OOS R-squared: -0.007


- No the $R^2$ value is negative

### 4 - 2

In [88]:
def OOS_strat(df, factors, start, weight):
    returns = []
    y = df['SPY']
    X = sm.add_constant(df[factors])

    for i,j in enumerate(df.index):
        if i >= start:
            currX = X.iloc[:i]
            currY = y.iloc[:i]
            reg = sm.OLS(currY, currX, missing = 'drop').fit()
            pred = reg.predict(X.iloc[[i]])
            w = pred * weight
            returns.append((df.iloc[i]['SPY'] * w)[0])

    df_strat = pd.DataFrame(data = returns, index = df.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat

In [89]:
OOS_EP = OOS_strat(df, ['EP'], 60, 100)

In [90]:
summary_stats_bm(OOS_EP['Strat Returns'], GMO[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown
Summary Stats,0.0819,0.1654,0.4953,0.0353,0.5435,-0.5837


- Compared to the in-sample version in Q3.2, this strategy under-performs in terms of the expected return and the Sharpe ratio of the portfolio. Though the Max draw-down is less than the DP regression strategy, it is not much less than all 3 strategies. Therefore, even in terms of risk, this strategy does not seem to be significantly better either. (We cannot rule with certainty on this with only this summary statistics)

### 4 - 3
#### (a):

In [91]:
VaR_OOS = pd.DataFrame([OOS_EP['Strat Returns'].quantile(.05),
                    df['SPY'].quantile(.05),
                    GMO['GMWAX'].quantile(.05)],
                   index = ['EP Strat','SPY','GMO'],
                   columns = ['5% VaR'])

VaR_OOS

Unnamed: 0,5% VaR
EP Strat,-0.071
SPY,-0.0739
GMO,-0.0473


#### (b):

In [92]:
summary_stats_bm(OOS_EP.loc['2000':'2011']['Strat Returns'], GMO[['SPY']])

Unnamed: 0,Mean,Vol,Sharpe (Mean/Vol),$\alpha$,$\beta^{SPY}$,Max Drawdown
Summary Stats,0.0388,0.1959,0.1979,0.0333,0.2994,-0.5837


In [93]:
summary_stats(rf.loc['2000':'2011'])

Unnamed: 0,Mean,Vol,Sharpe
US3M,0.0231,0.0058,3.9866


- The dynamic strategy barely overperforms the risk-free rate in terms of premium.

#### (c):

In [94]:
r_df_OOS = OOS_EP.rename(columns={"Strat Returns": "EP Strat"})
r_df_OOS['rf'] = rf['US3M']

df_riskprem2 = pd.DataFrame(data=None, index=[r'% of periods underperforming $r^{f}$'])
for col in r_df_OOS.columns[:3]:
    df_riskprem2[col] = len(r_df_OOS[r_df_OOS[col] < r_df_OOS['rf']])/len(r_df_OOS) * 100

df_riskprem2

Unnamed: 0,EP Strat,rf
% of periods underperforming $r^{f}$,38.3838,0.0


#### (d):
- The dynamic strategy tends to have worse risk metrics than SPY so it seems this strategy does take on extra risk.