In [43]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.expected_returns import ema_historical_return
from pypfopt.risk_models import exp_cov
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.plotting import plot_efficient_frontier
from pypfopt.plotting import plot_weights
from pypfopt.cla import CLA
from statsmodels.regression.rolling import RollingOLS
from scipy.stats import norm

def print(*args):
    __builtins__.print(*("%.4f" % a if isinstance(a, float) else a for a in args))
    
pd.set_option("display.precision", 5)
pd.options.display.float_format = "{:.4f}".format

def cagr(start_p,end_p,months):
    r1 = end_p/start_p
    mon_r = (r1)**(1/months)
    cagr_ret = (mon_r-1)*12
    return cagr_ret

import warnings
warnings.filterwarnings("ignore")

In [44]:
def Max_Drawdown(ret_data):
    ret_data2 = ret_data + 1
    ret_data2 = ret_data2.cumprod()
    ret_data2 = (ret_data2 - ret_data2.cummax())/ret_data2.cummax()
#     plt.figure()
#     plt.plot(ret_data2)
#     plt.show()
    max_dd1 = ret_data2.min()
    max_dd_dt = ret_data2.idxmin()
    return [max_dd1,max_dd_dt]

def performance_statistics(df, ann_fac):
    mean_ret = []
    vol_ret = []
    sharpe = []
    min_ret = []
    VaR_5_ret = []
    Max_DD = []
    df_stats = pd.DataFrame(index=df.columns)
    for i in df.columns:
        mean_ret.append(df[i].mean()*(ann_fac))
        vol_ret.append(df[i].std()*np.sqrt(ann_fac))
        sharpe.append((df[i].mean()*(ann_fac)) / (df[i].std()*np.sqrt(ann_fac)))
        min_ret.append(df[i].min())
        VaR_5_ret.append(df[i].quantile(0.05))
        Max_DD.append(Max_Drawdown(df[i]))
    df_stats['Ann_Mean_ret'] = mean_ret
    df_stats['Ann_Vol'] = vol_ret
    df_stats['Sharpe'] = sharpe
    df_stats['Minimum_ret'] = min_ret
    df_stats['VaR_0.05'] = VaR_5_ret
    df_stats['Max_DD'] = Max_DD
    return df_stats

def regression_res(Y, X):
    X = sm.add_constant(X)
    model1 = sm.OLS(Y,X,missing='drop')
    results1 = model1.fit()
    df1 = results1.params[1:].to_frame('Beta_Values')
    df1['Annualized_Alpha'] = results1.params[0]*12
    df1['R-Squared'] = results1.rsquared
    df1['Information_ratio'] = (results1.params[0]*12)/(results1.resid.std()*np.sqrt(12))
    fitted_values = results1.fittedvalues
    return df1, fitted_values

In [45]:
df_signals = pd.read_excel('gmo_analysis_data.xlsx',sheet_name='signals',index_col=0)
df_returns = pd.read_excel('gmo_analysis_data.xlsx',sheet_name='returns (total)',index_col=0)
df_rf = pd.read_excel('gmo_analysis_data.xlsx',sheet_name='risk-free rate',index_col=0)

display(df_signals)
display(df_returns)
display(df_rf)

Unnamed: 0,DP,EP,US10Y
1993-02-28,2.8200,4.4400,6.0300
1993-03-31,2.7700,4.4100,6.0300
1993-04-30,2.8200,4.4400,6.0500
1993-05-31,2.8100,4.3800,6.1600
1993-06-30,2.7900,4.3100,5.8000
...,...,...,...
2023-06-30,1.5800,3.8800,3.8100
2023-07-31,1.5300,3.7600,3.9700
2023-08-31,1.5500,3.8900,4.0900
2023-09-30,1.5700,4.1100,4.5900


Unnamed: 0,SPY,GMWAX
1993-02-28,0.0107,
1993-03-31,0.0224,
1993-04-30,-0.0256,
1993-05-31,0.0270,
1993-06-30,0.0037,
...,...,...
2023-06-30,0.0648,0.0398
2023-07-31,0.0327,0.0244
2023-08-31,-0.0163,-0.0210
2023-09-30,-0.0474,-0.0163


Unnamed: 0,US3M
1993-02-28,0.0025
1993-03-31,0.0025
1993-04-30,0.0025
1993-05-31,0.0026
1993-06-30,0.0026
...,...
2023-06-30,0.0045
2023-07-31,0.0046
2023-08-31,0.0046
2023-09-30,0.0046


## 2 Analyzing GMO
### This section utilizes data in the file, gmo analysis data.xlsx.
### Examine GMO’s performance. Use the risk-free rate to convert the total returns to excess returns
### 1. Calculate the mean, volatility, and Sharpe ratio for GMWAX. Do this for three samples:
#####    • from inception through 2011
#####    • 2012-present
#####    • inception - present
### Has the mean, vol, and Sharpe changed much since the case?
    The Mean value is the highest and volatility is lowest for GMO in the sample 2012-present. 
    So, the sharpe ratio has been higher since the case. (this is true for SPY as well)

In [46]:
df_returns_exc = df_returns.sub(df_rf['US3M'],axis=0)
# display(df_returns_exc)
X1 = ['Ann_Mean_ret','Ann_Vol','Sharpe']
print('--------Performance statistics for excess returns Inception-2011--------')
display(performance_statistics(df_returns_exc[df_returns_exc.index<='2011-12-31'],12)[X1])
print('\n \n--------Performance statistics for excess returns 2012-Present--------')
display(performance_statistics(df_returns_exc[df_returns_exc.index>'2011-12-31'],12)[X1])
print('\n \n--------Performance statistics for excess returns Inception-Present--------')
display(performance_statistics(df_returns_exc,12)[X1])

--------Performance statistics for excess returns Inception-2011--------


Unnamed: 0,Ann_Mean_ret,Ann_Vol,Sharpe
SPY,0.0538,0.1526,0.3528
GMWAX,0.0158,0.125,0.1266



 
--------Performance statistics for excess returns 2012-Present--------


Unnamed: 0,Ann_Mean_ret,Ann_Vol,Sharpe
SPY,0.1204,0.1431,0.8417
GMWAX,0.0364,0.0945,0.3856



 
--------Performance statistics for excess returns Inception-Present--------


Unnamed: 0,Ann_Mean_ret,Ann_Vol,Sharpe
SPY,0.0795,0.1491,0.5329
GMWAX,0.0249,0.1125,0.2209


### 2. GMO believes a risk premium is compensation for a security’s tendency to lose money at “bad times”. For all three samples, analyze extreme scenarios by looking at
##### • Min return
##### • 5th percentile (VaR-5th)
##### • Maximum drawdown
##### (a) Does GMWAX have high or low tail-risk as seen by these stats?
    GMWAX has lower tail risk in comparison to SPY based on these values in all the samples.
##### (b) Does that vary much across the two subsamples?
    Among, all the samples the tail risk is lowest in the most recent sample (2012-present).
    Minimum return and Max_DD have the same values in the Inception-2011 and Inception-present
    samples. 


In [47]:
X2 = ['Minimum_ret','VaR_0.05']
X3 = ['Max_DD']

print('--------Extreme scenario statistics Inception-2011--------')
display(performance_statistics(df_returns_exc[df_returns_exc.index<='2011-12-31'],12)[X2])
display(performance_statistics(df_returns[df_returns.index<='2011-12-31'],12)[X3])

print('--------Extreme scenario statistics 2012-present--------')
display(performance_statistics(df_returns_exc[df_returns_exc.index>'2011-12-31'],12)[X2])
display(performance_statistics(df_returns[df_returns.index>'2011-12-31'],12)[X3])

print('--------Extreme scenario statistics Inception-present--------')
display(performance_statistics(df_returns_exc,12)[X2])
display(performance_statistics(df_returns,12)[X3])

--------Extreme scenario statistics Inception-2011--------


Unnamed: 0,Minimum_ret,VaR_0.05
SPY,-0.1656,-0.0783
GMWAX,-0.1492,-0.0598


Unnamed: 0,Max_DD
SPY,"[-0.5079798292463792, 2009-02-28 00:00:00]"
GMWAX,"[-0.3552187776667767, 2001-09-30 00:00:00]"


--------Extreme scenario statistics 2012-present--------


Unnamed: 0,Minimum_ret,VaR_0.05
SPY,-0.1247,-0.0655
GMWAX,-0.1187,-0.0378


Unnamed: 0,Max_DD
SPY,"[-0.23928065009333999, 2022-09-30 00:00:00]"
GMWAX,"[-0.2167733447099205, 2022-09-30 00:00:00]"


--------Extreme scenario statistics Inception-present--------


Unnamed: 0,Minimum_ret,VaR_0.05
SPY,-0.1656,-0.0735
GMWAX,-0.1492,-0.0471


Unnamed: 0,Max_DD
SPY,"[-0.5079798292463792, 2009-02-28 00:00:00]"
GMWAX,"[-0.3552187776667767, 2001-09-30 00:00:00]"


### 3. For all three samples, regress excess returns of GMWAX on excess returns of SPY.
##### (a) Report the estimated alpha, beta, and r-squared.

In [48]:
X0 = ['Beta_Values','Annualized_Alpha','R-Squared']
print('--------Inception-2011--------')
display(regression_res(df_returns_exc[df_returns_exc.index<='2011-12-31']['GMWAX'],df_returns_exc[df_returns_exc.index<='2011-12-31']['SPY'])[0][X0])
print('\n \n--------2012-Present--------')
display(regression_res(df_returns_exc[df_returns_exc.index>'2011-12-31']['GMWAX'],df_returns_exc[df_returns_exc.index>'2011-12-31']['SPY'])[0][X0])
print('\n \n--------Inception-Present--------')
display(regression_res(df_returns_exc['GMWAX'],df_returns_exc['SPY'])[0][X0])

--------Inception-2011--------


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared
SPY,0.5396,-0.0058,0.5071



 
--------2012-Present--------


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared
SPY,0.5738,-0.0327,0.7544



 
--------Inception-Present--------


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared
SPY,0.5506,-0.0166,0.5821



##### (b) Is GMWAX a low-beta strategy? Has that changed since the case?
    Beta value doesn't seem to be low. The value is similar in all the samples. 
    The value is highest in most recent sample (0.574).
##### (c) Does GMWAX provide alpha? Has that changed across the subsamples?
    The alpha value was low in magnitude for Incpetion-2011 sample. However, it became much lower (on negative side) for 2012-present.
    GMWAX doesn't seem to provide positive alpha in all samples.

## 3 Forecast Regressions
### This section utilizes data in the file, gmo analysis data.xlsx.
#### 1. Consider the lagged regression, where the regressor, (X,) is a period behind the target, ($r^{SPY}$).

   $r^{SPY}_{t} = \alpha^{SPY,X} + (\beta^{SPY,X})'X_{t-1} + \epsilon^{SPY,X}_{t}$       (1)


In [49]:
df_signals['DP_t-1'] = df_signals['DP'].shift(1)
df_signals['EP_t-1'] = df_signals['EP'].shift(1)
df_signals['US10Y_t-1'] = df_signals['US10Y'].shift(1)
df_signals

Unnamed: 0,DP,EP,US10Y,DP_t-1,EP_t-1,US10Y_t-1
1993-02-28,2.8200,4.4400,6.0300,,,
1993-03-31,2.7700,4.4100,6.0300,2.8200,4.4400,6.0300
1993-04-30,2.8200,4.4400,6.0500,2.7700,4.4100,6.0300
1993-05-31,2.8100,4.3800,6.1600,2.8200,4.4400,6.0500
1993-06-30,2.7900,4.3100,5.8000,2.8100,4.3800,6.1600
...,...,...,...,...,...,...
2023-06-30,1.5800,3.8800,3.8100,1.6500,4.1300,3.6400
2023-07-31,1.5300,3.7600,3.9700,1.5800,3.8800,3.8100
2023-08-31,1.5500,3.8900,4.0900,1.5300,3.7600,3.9700
2023-09-30,1.5700,4.1100,4.5900,1.5500,3.8900,4.0900



### Estimate (1) and report the R2, as well as the OLS estimates for α and β. Do this for...
#### • X as a single regressor, the dividend-price ratio.


In [50]:
df_reg1 = regression_res(df_returns['SPY'],df_signals['DP_t-1'])[0]
display(df_reg1[X0])

Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared
DP_t-1,0.0095,-0.1138,0.0094


#### • X as a single regressor, the earnings-price ratio.

In [51]:
df_reg2 = regression_res(df_returns['SPY'],df_signals['EP_t-1'])[0]
display(df_reg2[X0])

Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared
EP_t-1,0.0033,-0.0739,0.0087


#### • X as three regressors, the dividend-price ratio, the earnings-price ratio, and the 10-year yield.
#### For each, report the r-squared.


In [52]:
df_reg3 = regression_res(df_returns['SPY'],df_signals[['DP_t-1','EP_t-1','US10Y_t-1']])[0]
print('The R-squared value: ',df_reg3['R-Squared'][0])
print('The Annualized Alpha value: ',df_reg3['Annualized_Alpha'][0])
display(df_reg3['Beta_Values'])

The R-squared value:  0.0164
The Annualized Alpha value:  -0.1808


DP_t-1       0.0080
EP_t-1       0.0027
US10Y_t-1   -0.0010
Name: Beta_Values, dtype: float64

### 2. For each of the three regressions, let’s try to utilize the resulting forecast in a trading strategy.
#### • Build the forecasted SPY returns: $r^{SPY}_{t+1}$. 
##### Note that this denotes the forecast made using $X_{t}$ to forecast the (t + 1) return.
#### • Set the scale of the investment in SPY equal to 100 times the forecasted value:
#### $w_{t} = 100 r^{SPY}_{t+1}$
#### We are not taking this scaling too seriously. We just want the strategy to go bigger in periods where the forecast is high and to withdraw in periods where the forecast is low, or even negative.
#### • Calculate the return on this strategy:
$r^{x}_{t+1} = w_{t}r^{SPY}_{t+1}$
#### You should now have the trading strategy returns, $r^{x}$ for each of the forecasts.

In [53]:
forecast_series = pd.DataFrame()
forecast_series['DP_t-1_regression'] = regression_res(df_returns['SPY'],df_signals['DP_t-1'])[1]
forecast_series['EP_t-1_regression'] = regression_res(df_returns['SPY'],df_signals['EP_t-1'])[1]
forecast_series['3_Var_regression'] = regression_res(df_returns['SPY'],df_signals[['DP_t-1','EP_t-1','US10Y_t-1']])[1]
forecast_series

Unnamed: 0,DP_t-1_regression,EP_t-1_regression,3_Var_regression
1993-03-31,0.0174,0.0083,0.0136
1993-04-30,0.0169,0.0082,0.0131
1993-05-31,0.0174,0.0083,0.0136
1993-06-30,0.0173,0.0081,0.0132
1993-07-31,0.0171,0.0079,0.0132
...,...,...,...
2023-06-30,0.0062,0.0073,0.0057
2023-07-31,0.0056,0.0065,0.0043
2023-08-31,0.0051,0.0061,0.0034
2023-09-30,0.0053,0.0065,0.0038


In [54]:
forecast_series_weights = forecast_series*100
forecast_series_weights['SPY'] = df_returns['SPY'][1:]
forecast_series_weights

Unnamed: 0,DP_t-1_regression,EP_t-1_regression,3_Var_regression,SPY
1993-03-31,1.7353,0.8279,1.3601,0.0224
1993-04-30,1.6878,0.8181,1.3119,-0.0256
1993-05-31,1.7353,0.8279,1.3581,0.0270
1993-06-30,1.7258,0.8083,1.3232,0.0037
1993-07-31,1.7068,0.7856,1.3236,-0.0049
...,...,...,...,...
2023-06-30,0.6220,0.7270,0.5725,0.0648
2023-07-31,0.5554,0.6457,0.4323,0.0327
2023-08-31,0.5078,0.6067,0.3442,-0.0163
2023-09-30,0.5268,0.6490,0.3835,-0.0474


In [55]:
forecast_series_results = forecast_series_weights
for i in forecast_series_results.columns:
    if i != 'SPY':
        forecast_series_results[i] = forecast_series_results[i]*forecast_series_results['SPY']
forecast_series_results

Unnamed: 0,DP_t-1_regression,EP_t-1_regression,3_Var_regression,SPY
1993-03-31,0.0389,0.0186,0.0305,0.0224
1993-04-30,-0.0432,-0.0209,-0.0336,-0.0256
1993-05-31,0.0468,0.0223,0.0366,0.0270
1993-06-30,0.0063,0.0030,0.0049,0.0037
1993-07-31,-0.0083,-0.0038,-0.0064,-0.0049
...,...,...,...,...
2023-06-30,0.0403,0.0471,0.0371,0.0648
2023-07-31,0.0182,0.0211,0.0142,0.0327
2023-08-31,-0.0083,-0.0099,-0.0056,-0.0163
2023-09-30,-0.0250,-0.0308,-0.0182,-0.0474


#### For each strategy, estimate
#### • mean, volatility, Sharpe

In [56]:
df_perf = (forecast_series_results.mean()*12).to_frame('Mean_returns')
df_perf['Vol'] = (forecast_series_results.std()*np.sqrt(12)).to_frame('Vol')
df_perf['Sharpe'] = df_perf['Mean_returns']/df_perf['Vol']
df_perf

Unnamed: 0,Mean_returns,Vol,Sharpe
DP_t-1_regression,0.1095,0.1489,0.7359
EP_t-1_regression,0.1081,0.1289,0.8382
3_Var_regression,0.1251,0.1456,0.8591
SPY,0.1032,0.1493,0.6914


#### • max-drawdown

In [57]:
for i in forecast_series_results.columns:
    print('Max DD values for {}: '.format(i))
    print(Max_Drawdown(forecast_series_results[i]),'\n\n')

Max DD values for DP_t-1_regression: 
[-0.656959756375434, Timestamp('2009-02-28 00:00:00')] 


Max DD values for EP_t-1_regression: 
[-0.3853137224900624, Timestamp('2010-06-30 00:00:00')] 


Max DD values for 3_Var_regression: 
[-0.5246210764733295, Timestamp('2009-02-28 00:00:00')] 


Max DD values for SPY: 
[-0.5079798292463792, Timestamp('2009-02-28 00:00:00')] 




#### • market alpha
#### • market beta
#### • market Information ratio

In [58]:
for i in forecast_series_results.columns:
    if i != 'SPY':
        print('CAPM regression statistics for {}: '.format(i))
        df_reg_res1 = regression_res(forecast_series_results[i],forecast_series_results['SPY'])[0]
        df_reg_res1.index = [i]
        display(df_reg_res1)
        print('\n')


CAPM regression statistics for DP_t-1_regression: 


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared,Information_ratio
DP_t-1_regression,0.863,0.0205,0.7487,0.2745




CAPM regression statistics for EP_t-1_regression: 


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared,Information_ratio
EP_t-1_regression,0.7358,0.0321,0.7258,0.4759




CAPM regression statistics for 3_Var_regression: 


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared,Information_ratio
3_Var_regression,0.7785,0.0448,0.6368,0.5101






### 3. GMO believes a risk premium is compensation for a security’s tendency to lose money at “bad times”. Let’s consider risk characteristics.
#### (a) For both strategies, the market, and GMO, calculate the monthly VaR for π = .05. Just use the quantile of the historic data for this VaR calculation.


In [59]:
df_v1 = forecast_series_results.quantile(0.05).to_frame('VaR_0.05').T
df_v1['GMWAX'] = df_returns['GMWAX'].quantile(0.05)
df_v1

Unnamed: 0,DP_t-1_regression,EP_t-1_regression,3_Var_regression,SPY,GMWAX
VaR_0.05,-0.0523,-0.0539,-0.0641,-0.0723,-0.0468


#### (b) The GMO case mentions that stocks under-performed short-term bonds from 2000-2011. Does the dynamic portfolio above under-perform the risk-free rate over this time?
    In terms of mean returns, the dynamic portfolio performs significantly better than short term bonds. However, the volatility for bonds are very very low resulting in higher sharpe ratio.

In [60]:
X11 = ['Ann_Mean_ret','Ann_Vol','Sharpe']
df_v2 = performance_statistics(forecast_series_results[(forecast_series_results.index>='2000-01-01') & (forecast_series_results.index<='2011-12-31')],12)[X11].iloc[:-1,:].T
df_v2['US3M'] = performance_statistics(df_rf[(df_rf.index>='2000-01-01') & (df_rf.index<='2011-12-31')],12)[X11].T
df_v2

Unnamed: 0,DP_t-1_regression,EP_t-1_regression,3_Var_regression,US3M
Ann_Mean_ret,0.0397,0.0377,0.0615,0.0231
Ann_Vol,0.186,0.1348,0.1589,0.0058
Sharpe,0.2135,0.2798,0.387,3.9866


#### (c) Based on the regression estimates, in how many periods do we estimate a negative risk premium?
    Around 38% of the number of periods have negative risk premium.

In [61]:
forecast_series_results2 = forecast_series_results
forecast_series_results2['US3M'] = df_rf.iloc[1:,:]
for i in forecast_series_results2.columns:
    if i not in ['SPY','US3M']:
        forecast_series_results2['RP_'+i] = forecast_series_results2[i]-forecast_series_results2['US3M']
        forecast_series_results2['RP_'+i] = forecast_series_results2['RP_'+i].apply(lambda x: 1 if x>0 else 0)

1 - forecast_series_results2[['RP_DP_t-1_regression','RP_EP_t-1_regression','RP_3_Var_regression']].sum()/forecast_series_results2.shape[0]

RP_DP_t-1_regression   0.3777
RP_EP_t-1_regression   0.3777
RP_3_Var_regression    0.3750
dtype: float64

#### (d) Do you believe the dynamic strategy takes on extra risk??
    Judging by the tail risk metrics and volatility of the dynamic strategies compared to SPY it 
    does not seem like these strategies take on extra risk on the whole. 
    However, we must keep in mind that the strategies are dependent on running regressions with very little prediction power, so badly estimated parameters could lead to terrible performance. This is not evident in terms of very high volatility and tail risk in our backtesting period
    though.
    Also, 38% of the time periods show under-performance in comparison to short-term bonds.

## 4 Out-of-Sample Forecasting
### This section utilizes data in the file, gmo analysis data.xlsx.
### Reconsider the problem above, of estimating (1) for x. The reported $R^{2}$ was the in-sample $R^{2}$ – it examined how well the forecasts fit in the sample from which the parameters were estimated.
### In particular, focus on the case of using both dividend-price and earnings-price as signals.
### Let’s consider the out-of-sample r-squared. To do so, we need to do the following:

#### • Start at t = 60.
#### • Estimate (1) only using data through time t.
#### • Use the estimated parameters of (1), along with $x_{t+1}$ to calculate the out-of-sample forecast for the following period, t + 1.
$\hat{r}^{SPY}_{t+1} = \hat{\alpha}^{SPY,x}_{t} + (\beta^{SPY,x})
′
x_{t}$

#### • Calculate the t + 1 forecast error,
$\epsilon^{x}_{t+1} = r^{SPY}_{t+1} − \hat{r}^{SPY}_{t+1}$

#### • Move to t = 61, and loop through the rest of the sample. 

#### You now have the time-series of out-of-sample prediction errors, $\epsilon^{x}$.


#### Calculate the time-series of out-of-sample prediction errors $\epsilon^{0}$, which are based on the null forecast:
$\bar{r}^{SPY}_{t+1} = \frac{1}{t}\sum_{i=1}^{t}r^{SPY}_{i}$

$\epsilon^{0}_{t+1} = r^{SPY}_{t+1} − \bar{r}^{SPY}_{t+1}$

In [82]:
df_oos = df_signals[['DP','EP','US10Y']]
df_oos['SPY'] = df_returns['SPY']
oos_ret_arr = pd.DataFrame(index=df_oos.index)
oos_ret_arr['OOS_Ret'] = np.zeros(df_oos.shape[0])
oos_e_0 = pd.DataFrame(index=df_oos.index)
oos_e_0['OOS_E_0'] = np.zeros(df_oos.shape[0])

for i in range(len(df_oos)-60):
    X_reg = df_oos[['DP','EP','US10Y']].iloc[i:i+60,]
    X_reg = sm.add_constant(X_reg)
    Y_reg = df_oos['SPY'][i:i+60]
    model2 = sm.OLS(Y_reg,X_reg)
    results2 = model2.fit()
    oos_ret_arr.iloc[i+60,] = (np.array(results2.params[1:]).dot(np.array(df_oos[['DP','EP','US10Y']].iloc[i+60,]))) + (results2.params[0])
    oos_e_0.iloc[i+60,] = df_oos['SPY'].iloc[i+60,] - df_oos['SPY'].iloc[i:i+60,].mean()
    
df_oos['OOS_Ret'] = oos_ret_arr['OOS_Ret']
df_oos['errors_null'] = oos_e_0['OOS_E_0']
df_oos['errors_X'] = df_oos['SPY'] - df_oos['OOS_Ret']

df_oos = df_oos.iloc[60:,:]
df_oos

Unnamed: 0,DP,EP,US10Y,SPY,OOS_Ret,errors_null,errors_X
1998-02-28,1.5200,3.8700,5.6200,0.0693,0.0217,0.0535,0.0476
1998-03-31,1.4500,3.6700,5.6700,0.0488,0.0244,0.0320,0.0244
1998-04-30,1.4200,3.5400,5.6800,0.0128,0.0255,-0.0044,-0.0127
1998-05-31,1.4300,3.5300,5.5600,-0.0208,0.0257,-0.0386,-0.0465
1998-06-30,1.4400,3.5200,5.4400,0.0426,0.0220,0.0255,0.0206
...,...,...,...,...,...,...,...
2023-06-30,1.5800,3.8800,3.8100,0.0648,0.0223,0.0547,0.0425
2023-07-31,1.5300,3.7600,3.9700,0.0327,0.0295,0.0216,0.0033
2023-08-31,1.5500,3.8900,4.0900,-0.0163,0.0268,-0.0273,-0.0431
2023-09-30,1.5700,4.1100,4.5900,-0.0474,0.0191,-0.0576,-0.0665


### 1. Report the out-of-sample $R^{2}$:

### $R^{2}_{OOS} ≡ 1 − \frac{\sum_{i=61}^{T}(\epsilon^{x}_{i})^{2}}{\sum_{i=61}^{T}(\epsilon^{0}_{i})^{2}}$

#### Note that unlike an in-sample r-squared, the out-of-sample r-squared can be anywhere between (−∞, 1].
#### Did this forecasting strategy produce a positive OOS r-squared?
No, the R-squared value is coming out to be -0.0912

In [84]:
val1 = ((df_oos['errors_X']**2).sum()) / ((df_oos['errors_null']**2).sum())
print('R-Squared value for Out-of-Sample: ',1-(val1))

R-Squared value for Out-of-Sample:  -0.0912


### 2. Re-do problem 3.2 using this OOS forecast. How much better/worse is the OOS strategy compared to the in-sample version of 3.2?


In [85]:
df_oos['w_t'] = 100*df_oos['OOS_Ret']
df_oos['Strategy_ret'] = df_oos['w_t']*df_oos['SPY']
df_oos

Unnamed: 0,DP,EP,US10Y,SPY,OOS_Ret,errors_null,errors_X,w_t,Strategy_ret
1998-02-28,1.5200,3.8700,5.6200,0.0693,0.0217,0.0535,0.0476,2.1732,0.1506
1998-03-31,1.4500,3.6700,5.6700,0.0488,0.0244,0.0320,0.0244,2.4400,0.1190
1998-04-30,1.4200,3.5400,5.6800,0.0128,0.0255,-0.0044,-0.0127,2.5473,0.0326
1998-05-31,1.4300,3.5300,5.5600,-0.0208,0.0257,-0.0386,-0.0465,2.5725,-0.0534
1998-06-30,1.4400,3.5200,5.4400,0.0426,0.0220,0.0255,0.0206,2.1995,0.0937
...,...,...,...,...,...,...,...,...,...
2023-06-30,1.5800,3.8800,3.8100,0.0648,0.0223,0.0547,0.0425,2.2316,0.1446
2023-07-31,1.5300,3.7600,3.9700,0.0327,0.0295,0.0216,0.0033,2.9470,0.0965
2023-08-31,1.5500,3.8900,4.0900,-0.0163,0.0268,-0.0273,-0.0431,2.6815,-0.0436
2023-09-30,1.5700,4.1100,4.5900,-0.0474,0.0191,-0.0576,-0.0665,1.9119,-0.0906


In [93]:
print('Mean return -',df_oos['Strategy_ret'].mean()*12)
print('Volatility -',df_oos['Strategy_ret'].std()*np.sqrt(12))
print('Sharpe ratio -',(df_oos['Strategy_ret'].mean()*12) / (df_oos['Strategy_ret'].std()*np.sqrt(12)))
print('Max Drawdown - ',Max_Drawdown(df_oos['Strategy_ret']))
df_capm = regression_res(df_oos['Strategy_ret'],df_oos['SPY'])[0]
df_capm.index = ['Strategy']
display(df_capm)

Mean return - 0.2538
Volatility - 0.5686
Sharpe ratio - 0.4463
Max Drawdown -  [-0.9613317076813702, Timestamp('2009-04-30 00:00:00')]


Unnamed: 0,Beta_Values,Annualized_Alpha,R-Squared,Information_ratio
Strategy,-0.5219,0.2989,0.0204,0.5311


### 3. Re-do problem 3.3 using this OOS forecast. Is the point-in-time version of the strategy riskier?

In [95]:
print('VaR 0.05 value for this strategy : ',df_oos['Strategy_ret'].quantile(0.05))

VaR 0.05 value for this strategy :  -0.1549


    Compared to the full sub-sample, the mean returns go down significantly during 2000-2011.
    The volatility slightly increasesthus the strategy experiences a lower Sharpe Ratio. Given the lower performance of the strategy and worse tail risk measures compared to SPY, the strategy 
    does take on extra risk.