In [551]:
import pandas as pd
import numpy as np
pd.options.display.float_format = "{:,.4f}".format

import matplotlib.pyplot as plt
import seaborn as sns
import math

import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from scipy.stats import norm
import scipy.stats as stats

import warnings
warnings.filterwarnings("ignore")

import TA_utils as ta

from typing import Union, List

from collections import defaultdict

import re

In [552]:
def calc_summary_statistics(
        returns: Union[pd.DataFrame, List],
        annual_factor: int = None,
        provided_excess_returns: bool = None,
        rf: pd.Series = None,
        var_quantile: Union[float, List] = .05
    ):
    returns = returns.copy()
    if isinstance(rf, (pd.Series, pd.DataFrame)):
        rf = rf.copy()
        
    if isinstance(returns, list):
        returns_list = returns[:]
        returns = pd.DataFrame({})
        for series in returns_list:
            returns = returns.merge(series, right_index=True, left_index=True, how='outer')
    """
    This functions returns the summary statistics for the input total/excess returns passed
    into the function
    """
    if 'date' in returns.columns.str.lower():
        returns = returns.rename({'Date': 'date'}, axis=1)
        returns = returns.set_index('date')
    returns.index.name = 'date'

    if annual_factor is None:
        print('Assuming monthly returns with annualization term of 12')
        annual_factor = 12

    if provided_excess_returns is None:
        print(
            'Assuming excess returns were provided to calculate Sharpe.'
            ' If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"'
        )
        provided_excess_returns = True
    elif provided_excess_returns is False:
        if rf is not None:
            if len(rf.index) != len(returns.index):
                raise Exception('"rf" index must be the same lenght as "returns"')
            print('"rf" is used to subtract returns to calculate Sharpe, but nothing else')

    summary_statistics = pd.DataFrame(index=returns.columns)
    summary_statistics['Annualized Mean'] = returns.mean() * annual_factor
    summary_statistics['Annualized Vol'] = returns.std() * np.sqrt(annual_factor)
    try:
        if not provided_excess_returns:
            if type(rf) == pd.DataFrame:
                rf = rf.iloc[:, 0].to_list()
            elif type(rf) == pd.Series:
                rf = rf.to_list()
            else:
                raise Exception('"rf" must be either a pd.DataFrame or pd.Series')
            excess_returns = returns.apply(lambda x: x - rf)
            summary_statistics['Sharpe'] = excess_returns.mean() / returns.std()
        else:
            summary_statistics['Sharpe'] = returns.mean() / returns.std()
    except Exception as e:
        print(f'Could not calculate Sharpe: {e}')
    summary_statistics['Annualized Sharpe'] = summary_statistics['Sharpe'] * np.sqrt(annual_factor)
    summary_statistics['Min'] = returns.min()
    summary_statistics['Max'] = returns.max()
    summary_statistics['Skewness'] = returns.skew()
    summary_statistics['Excess Kurtosis'] = returns.kurtosis()
    var_quantile = [var_quantile] if isinstance(var_quantile, (float, int)) else var_quantile
    for var_q in var_quantile:
        summary_statistics[f'Annualized Historical VaR ({var_q:.2%})'] = returns.quantile(var_q, axis = 0) * np.sqrt(annual_factor)
        summary_statistics[f'Historical VaR ({var_q:.2%})'] = returns.quantile(var_q, axis = 0)
        summary_statistics[f'Annualized Historical CVaR ({var_q:.2%})'] = returns[returns <= returns.quantile(var_q, axis = 0)].mean() * np.sqrt(annual_factor)
        summary_statistics[f'Historical CVaR ({var_q:.2%})'] = returns[returns <= returns.quantile(var_q, axis = 0)].mean()
    
    wealth_index = 1000 * (1 + returns).cumprod()
    previous_peaks = wealth_index.cummax()
    drawdowns = (wealth_index - previous_peaks) / previous_peaks
    
    summary_statistics['Max Drawdown'] = drawdowns.min()
    summary_statistics['Peak'] = [previous_peaks[col][:drawdowns[col].idxmin()].idxmax() for col in previous_peaks.columns]
    summary_statistics['Bottom'] = drawdowns.idxmin()
    
    recovery_date = []
    for col in wealth_index.columns:
        prev_max = previous_peaks[col][:drawdowns[col].idxmin()].max()
        recovery_wealth = pd.DataFrame([wealth_index[col][drawdowns[col].idxmin():]]).T
        recovery_date.append(recovery_wealth[recovery_wealth[col] >= prev_max].index.min())
    summary_statistics['Recovery'] = recovery_date
    try:
        summary_statistics["Duration (days)"] = [
            (i - j).days if i != "-" else "-" for i, j in
            zip(summary_statistics["Recovery"], summary_statistics["Bottom"])
        ]
    except (AttributeError, TypeError) as e:
        print(f'Cannot calculate "Drawdown Duration" calculation because index are not dates: {str(e)}')
    
    return summary_statistics

In [553]:
def calc_regression(
        y: Union[pd.DataFrame, pd.Series],
        X: Union[pd.DataFrame, pd.Series],
        intercept: bool = True,
        annual_factor: Union[None, int] = 12,
        warnings: bool = True,
        return_model: bool = False,
        calc_treynor_info_ratios: bool = True
    ):
    y = y.copy()
    X = X.copy()
    
    if 'date' in X.columns.str.lower():
        X = X.rename({'Date': 'date'}, axis=1)
        X = X.set_index('date')
    X.index.name = 'date'
    
    if warnings:
        print('"calc_regression" assumes excess returns to calculate Information and Treynor Ratios')
    if intercept:
        X = sm.add_constant(X)
    
    y_name = y.name if isinstance(y, pd.Series) else y.columns[0]
    
    try:
        model = sm.OLS(y, X, missing="drop")
    except ValueError:
        y = y.reset_index(drop=True)
        X = X.reset_index(drop=True)
        model = sm.OLS(y, X, missing="drop")
        if warnings:
            print(f'"{y_name}" Required to reset indexes to make regression work. Try passing "y" and "X" as pd.DataFrame')
    results = model.fit()
    summary = dict()

    if return_model:
        return results

    inter = results.params[0] if intercept else None
    betas = results.params[1:] if intercept else results.params

    summary["Alpha"] = inter if inter is not None else '-'
    summary["Annualized Alpha"] = inter * annual_factor if inter is not None else '-'
    summary["R-Squared"] = results.rsquared

    if isinstance(X, pd.Series):
        X = pd.DataFrame(X)

    X_assets = X.columns[1:] if intercept else X.columns
    for i, asset_name in enumerate(X_assets):
        summary[f"{asset_name} Beta"] = betas[i]

    if calc_treynor_info_ratios:
        if len([c for c in X.columns if c != 'const']) == 1:
            summary["Treynor Ratio"] = (y.mean() / betas[0])
            summary["Annualized Treynor Ratio"] = summary["Treynor Ratio"] * annual_factor
        summary["Information Ratio"] = (inter / results.resid.std()) if intercept else "-"
        summary["Annualized Information Ratio"] = summary["Information Ratio"] * np.sqrt(annual_factor) if intercept else "-"
    summary["Tracking Error"] = results.resid.std()
    summary["Annualized Tracking Error"] = results.resid.std() * np.sqrt(annual_factor)
    summary['Fitted Mean'] = results.fittedvalues.mean()
    summary['Annualized Fitted Mean'] = summary['Fitted Mean'] * annual_factor
    y_name = f"{y_name} no Intercept" if not intercept else y_name
    return pd.DataFrame(summary, index=[y_name])

In [554]:
def format_as_percentage(value):
    return '{:.2%}'.format(value)

## Read Data
    NOTES: 
    1) Report Annualized Alpha and Historical VaR. Annualized Historical VaR usually don't make sense.
    2) Watch out in the question if it asks the excess return or the total return!
    3) If you are getting the correct r-squared but wrong alpha and betas than you are getting confused of the x and the y
        Regress excess returns of GMWAX on excess returns of SPY means that Y = GMWAX and X = SPY
    4) Before a regression, dropna() does not change the resulting parameters since NaN values are dropped automatically
    5) When you shift the dataframe, shift(1) is the standard procedure for X, you don't touch Y and the first value of X is NaN

In [555]:
spy_tot_ret = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='returns (total)', index_col = 0)
rf = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='risk-free rate', index_col = 0)
signals = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='signals', index_col = 0)


## 2.1
    The mean excess return doubled compared to before the case. Sharpe Ratio is more than tripled
    

In [556]:
spy_excs_ret = spy_tot_ret.copy()
for col in spy_tot_ret.columns:
    spy_excs_ret[col] = spy_tot_ret[col] - rf['US3M']

In [557]:
gmwax_until_2011 = spy_excs_ret['GMWAX'][:'2011'].to_frame()
gmwax_after_2011 = spy_excs_ret['GMWAX']['2012':].to_frame()
gmwax_since_incept = spy_excs_ret['GMWAX'].to_frame()

perf_until_2011 = calc_summary_statistics(gmwax_until_2011)
perf_after_2011 = calc_summary_statistics(gmwax_after_2011)
perf_since_incept = calc_summary_statistics(gmwax_since_incept)

agg_perf = pd.concat([perf_until_2011, perf_after_2011, perf_since_incept])
agg_perf.index = ['Until 2011', 'After 2011', 'Since Inception']
agg_perf[['Annualized Mean', 'Annualized Vol', 'Annualized Sharpe']]

Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"


Unnamed: 0,Annualized Mean,Annualized Vol,Annualized Sharpe
Until 2011,0.0158,0.125,0.1266
After 2011,0.0364,0.0945,0.3856
Since Inception,0.0249,0.1125,0.2209


## 2.2
    Until 2011, GMWAX had high tail risk. However after 2011, its tail risk decreased dramatically
    When two subsamples are compared (before 2011 and after 2011) there is ~3% difference in the minimum returns 
    but the maximum drawdowns are significantly different than each other. Also 5% historical VaR is dramatically vary.

In [589]:
gmwax_tot_until_2011 = spy_tot_ret['GMWAX'][:'2011'].to_frame()
gmwax_tot_after_2011 = spy_tot_ret['GMWAX']['2012':].to_frame()
gmwax_tot_since_incept = spy_tot_ret['GMWAX'].to_frame()

perf_until_2011_tot = calc_summary_statistics(gmwax_tot_until_2011)
perf_after_2011_tot = calc_summary_statistics(gmwax_tot_after_2011)
perf_since_incept_tot = calc_summary_statistics(gmwax_tot_since_incept)

agg_perf_tot = pd.concat([perf_until_2011_tot, perf_after_2011_tot, perf_since_incept_tot])
agg_perf_tot.index = ['Until 2011', 'After 2011', 'Since Inception']
agg_perf_tot[['Min', 'Max Drawdown', 'Historical VaR (5.00%)']]

Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"


Unnamed: 0,Min,Max Drawdown,Historical VaR (5.00%)
Until 2011,-0.145,-0.3552,-0.0562
After 2011,-0.1186,-0.2168,-0.0368
Since Inception,-0.145,-0.3552,-0.0468


## 2.3
    GMWAX is not a low Beta strategy. Until 2011, the beta was above 0.90 and after 2011, it increased above 1.30 which shows that it's a high Beta strategy. GMWAX, brought dramatic alpha especially after 2011. Particularly with R-squared around 0.75 and alpha > 0.07 GMWAX captured both a significant chunk of variation in SPY and brought unexplained return. Until 2011, r-squared was 0.50 which was much lower and the alpha was around 0.025 which was not bad but relatively low when compared to after 2011

In [615]:
y1 = spy_excs_ret['GMWAX'][:'2011'].to_frame()
X1 = spy_excs_ret['SPY'][:'2011'].to_frame()

y2 = spy_excs_ret['GMWAX']['2012':].to_frame()
X2 = spy_excs_ret['SPY']['2012':].to_frame()

y3 = spy_excs_ret['GMWAX'].to_frame()
X3 = spy_excs_ret['SPY'].to_frame()

regress_until_2011 = calc_regression(y1, X1)
regress_after_2011 = calc_regression(y2, X2)
regress_since_intercept = calc_regression(y3, X3)

regress_perf = pd.concat([regress_until_2011, regress_after_2011, regress_since_intercept])
regress_perf.index = ['Until 2011', 'After 2011', 'Since Inception']
regress_perf[['Annualized Alpha', 'R-Squared', 'SPY Beta']]


"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios


Unnamed: 0,Annualized Alpha,R-Squared,SPY Beta
Until 2011,-0.0058,0.5071,0.5396
After 2011,-0.0327,0.7544,0.5738
Since Inception,-0.0166,0.5821,0.5506


## 3.1

In [524]:
y = spy_tot_ret['SPY']
X_dp = signals['DP'].to_frame().shift(1)
X_ep = signals['EP'].to_frame().shift(1)
X_all = signals.shift(1)

regress_per_dp = calc_regression(y, X_dp)
regress_per_ep = calc_regression(y, X_ep)
regress_all_factors = calc_regression(y, X_all)


regress_perf = pd.concat([regress_per_dp, regress_per_ep, regress_all_factors])
regress_perf.index = ['Dividend-Price', 'Earnings-Price', 'All Factors']

regress_perf[['R-Squared']]

"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios


Unnamed: 0,R-Squared
Dividend-Price,0.0094
Earnings-Price,0.0087
All Factors,0.0164


## 3.2

In [525]:
forecast_dp = (X_dp['DP'] * regress_per_dp['DP Beta'][0]) + regress_per_dp['Alpha'][0]
forecast_dp = forecast_dp.to_frame().rename(columns = {'DP': 'Forecast DP'}) * 100

strat_dp = pd.DataFrame(forecast_dp['Forecast DP'] * spy_tot_ret['SPY'], columns = forecast_dp.columns, index = forecast_dp.index)
strat_dp

Unnamed: 0,Forecast DP
1993-02-28,
1993-03-31,0.0389
1993-04-30,-0.0432
1993-05-31,0.0468
1993-06-30,0.0063
...,...
2023-06-30,0.0403
2023-07-31,0.0182
2023-08-31,-0.0083
2023-09-30,-0.0250


In [526]:
forecast_ep = (X_ep['EP'] * regress_per_ep['EP Beta'][0]) + regress_per_ep['Alpha'][0]
forecast_ep = forecast_ep.to_frame().rename(columns = {'EP': 'Forecast EP'}) * 100

strat_ep = pd.DataFrame(forecast_ep['Forecast EP'] * spy_tot_ret['SPY'], columns = forecast_ep.columns, index = forecast_ep.index)
strat_ep

Unnamed: 0,Forecast EP
1993-02-28,
1993-03-31,0.0186
1993-04-30,-0.0209
1993-05-31,0.0223
1993-06-30,0.0030
...,...
2023-06-30,0.0471
2023-07-31,0.0211
2023-08-31,-0.0099
2023-09-30,-0.0308


In [527]:
forecast_all = (np.array(X_all[['DP', 'EP','US10Y']]) @ np.array(regress_all_factors[['DP Beta', 'EP Beta', 'US10Y Beta']].T)) 
forecast_all = pd.DataFrame(forecast_all, columns = ['Forecast Return All'], index = X_all.index) 
forecast_all['Forecast Return All'] = (forecast_all['Forecast Return All'] + regress_all_factors['Alpha'][0]) * 100

strat_all = pd.DataFrame(forecast_all['Forecast Return All'] * spy_tot_ret['SPY'], columns = forecast_all.columns, index = forecast_all.index)
strat_all

Unnamed: 0,Forecast Return All
1993-02-28,
1993-03-31,0.0305
1993-04-30,-0.0336
1993-05-31,0.0366
1993-06-30,0.0049
...,...
2023-06-30,0.0371
2023-07-31,0.0142
2023-08-31,-0.0056
2023-09-30,-0.0182


In [536]:
strat_dp_summary = calc_summary_statistics(strat_dp)
strat_ep_summary = calc_summary_statistics(strat_ep)
strat_all_summary = calc_summary_statistics(strat_all)

neg_risk_prem_dp = len(strat_dp[strat_dp['Forecast DP'] - rf['US3M'] <0])
neg_risk_prem_ep = len(strat_ep[strat_ep['Forecast EP'] - rf['US3M'] <0])
neg_risk_prem_all = len(strat_all[strat_all['Forecast Return All'] - rf['US3M'] <0])
total_days = len(strat_all.dropna())

agg_perf_strat = pd.concat([strat_dp_summary, strat_ep_summary, strat_all_summary])

agg_perf_strat['Negative Risk Premium Period'] = [neg_risk_prem_dp, neg_risk_prem_ep, neg_risk_prem_all]
agg_perf_strat['Total Period'] = [total_days, total_days, total_days]

pd.concat([agg_perf_strat, agg_perf_market], axis=1)
agg_perf_strat.index = ['DP', 'EP', 'All']



Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"


In [537]:
strat_dp_market = calc_regression(strat_dp, spy_excs_ret[['SPY']])[['Annualized Alpha', 'SPY Beta', 'Annualized Information Ratio']]
strat_ep_market = calc_regression(strat_ep, spy_excs_ret[['SPY']])[['Annualized Alpha', 'SPY Beta', 'Annualized Information Ratio']]
strat_all_market = calc_regression(strat_all, spy_excs_ret[['SPY']])[['Annualized Alpha', 'SPY Beta', 'Annualized Information Ratio']]

strat_markets = pd.concat([strat_dp_market, strat_ep_market, strat_all_market])

strat_markets

"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios


Unnamed: 0,Annualized Alpha,SPY Beta,Annualized Information Ratio
Forecast DP,0.0411,0.8617,0.549
Forecast EP,0.0498,0.7335,0.7326
Forecast Return All,0.0633,0.7782,0.7212


## 3.3 a)
    All portfolios outperform the risk-free rate

In [538]:
agg_perf_strat[['Historical VaR (5.00%)']]

Unnamed: 0,Historical VaR (5.00%)
DP,-0.0523
EP,-0.0539
All,-0.0641


## 3.3 b)

In [539]:
strats = {'DP': strat_dp.dropna().rename(columns = {'Forecast DP':'Forecast Return'}),
          'EP': strat_ep.dropna().rename(columns = {'Forecast EP':'Forecast Return'}),
          'All Factors': strat_all.dropna().rename(columns = {'Forecast Return All':'Forecast Return'}),
          'Risk Free Rate': rf['US3M'].to_frame('Forecast Return')
         }
strat_summary_0011 =[]
for k,v in strats.items():
    strat = (strats[k]['2000':'2011']['Forecast Return']).to_frame('Forecast Returns')
    perf_summary = calc_summary_statistics(strat)
    perf_summary.index = [k]
    strat_summary_0011.append(perf_summary)
    
strat_summary_df_0011 = pd.concat(strat_summary_0011)
strat_summary_df_0011.loc[:,['Annualized Mean','Annualized Vol','Annualized Sharpe','Max Drawdown']]

Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"


Unnamed: 0,Annualized Mean,Annualized Vol,Annualized Sharpe,Max Drawdown
DP,0.0397,0.186,0.2135,-0.657
EP,0.0377,0.1348,0.2798,-0.3853
All Factors,0.0615,0.1589,0.387,-0.5246
Risk Free Rate,0.0231,0.0058,3.9866,0.0


## 3.3 c)

In [540]:
agg_perf_strat['Negative Risk Premium %'] = (agg_perf_strat['Negative Risk Premium Period'] / agg_perf_strat['Total Period']).apply(format_as_percentage)
agg_perf_strat[['Negative Risk Premium Period', 'Total Period', 'Negative Risk Premium %']]


Unnamed: 0,Negative Risk Premium Period,Total Period,Negative Risk Premium %
DP,139,368,37.77%
EP,139,368,37.77%
All,138,368,37.50%


## 3.3 d)
    
    I don't believe in the dynamic strategy by assessing the tail risk and the volatility measure

## 4.1
    This forecasting strategy produces a negative OOS r-squared, which shows that our strategy fits the data worse than a horizontal line

In [541]:
def OOS_r2_expand(
        y: Union[pd.DataFrame, pd.Series],
        X: Union[pd.DataFrame, pd.Series],
        intercept: bool = True,
        annual_factor: Union[None, int] = 12,
        window_size=60):
    
    """
    X: Should be shifted by 1 before plugging
    y: The specific column needs to be filtered
    """
    
    forecast_err, null_err = [], []

    if intercept:
        X = sm.add_constant(X)

    for i in range(window_size, len(y)):
        curry = y.iloc[:i].copy()
        currX = X.iloc[:i].copy()
        
        if 'date' in currX.columns.str.lower():
            currX = currX.rename({'Date': 'date'}, axis=1)
            currX = currX.set_index('date')
        currX.index.name = 'date'
        
        try:
            model = sm.OLS(curry, currX, missing="drop").fit()
            
        except ValueError:
            curry = curry.reset_index(drop=True)
            currX = currX.reset_index(drop=True)
            model = sm.OLS(curry, currX, missing="drop").fit()
        
        null_forecast = curry.mean()
        model_pred = model.predict(X.iloc[[i]])
        actual = y.iloc[[i]]
        
        forecast_err.append(model_pred - actual)
        null_err.append(null_forecast - actual)

    RSS = (np.array(forecast_err)**2).sum()
    TSS = (np.array(null_err)**2).sum()

    return ((1 - RSS/TSS), model)


In [542]:
X_dp = signals['DP'].shift(1).to_frame()
y_dp = spy_tot_ret['SPY']

model_dp = OOS_r2_expand(y_dp, X_dp, intercept = True, window_size=60)
OOS_r2_dp = model_dp[0]
model_dp_params = model_dp[1]
OOS_r2_dp_df = pd.DataFrame([[OOS_r2_dp]], columns = ['OOS R-squared'], index = ['DP'])


X_ep = signals['EP'].shift(1).to_frame()
y_ep = spy_tot_ret['SPY']

model_ep = OOS_r2_expand(y_ep, X_ep, intercept = True, window_size=60)
OOS_r2_ep = model_ep[0]
model_ep_params = model_ep[1]
OOS_r2_ep_df = pd.DataFrame([[OOS_r2_ep]], columns = ['OOS R-squared'], index = ['EP'])


X_both = signals[['EP','DP']].shift(1)
y_both = spy_tot_ret['SPY']

model_both = OOS_r2_expand(y_both, X_both, intercept = True, window_size=60)
OOS_r2_both = model_both[0]
model_both_params = model_both[1]
OOS_r2_both_df = pd.DataFrame([[OOS_r2_both]], columns = ['OOS R-squared'], index = ['DP & EP'])

OOS_r2_agg = pd.concat([OOS_r2_dp_df, OOS_r2_ep_df, OOS_r2_both_df])



In [543]:
OOS_r2_agg

Unnamed: 0,OOS R-squared
DP,-0.0021
EP,-0.0064
DP & EP,-0.0172


## 4.2 

In [544]:
def OOS_expand_strat(
        y: Union[pd.DataFrame, pd.Series],
        X: Union[pd.DataFrame, pd.Series],
        intercept: bool = True,
        annual_factor: Union[None, int] = 12,
        window_size=60,
        weight = 100):
    
    """
    X: Should be shifted by 1 before plugging
    y: The specific column needs to be filtered
    """
    
    returns = []

    if intercept:
        X = sm.add_constant(X)

    for i in range(window_size, len(y)):
        curry = y.iloc[:i].copy()
        currX = X.iloc[:i].copy()
        
        if 'date' in currX.columns.str.lower():
            currX = currX.rename({'Date': 'date'}, axis=1)
            currX = currX.set_index('date')
        currX.index.name = 'date'
        
        try:
            model = sm.OLS(curry, currX, missing="drop").fit()
            
        except ValueError:
            curry = curry.reset_index(drop=True)
            currX = currX.reset_index(drop=True)
            model = sm.OLS(curry, currX, missing="drop").fit()
        
        model_pred = model.predict(X.iloc[[i]])

        w = model_pred * weight
        returns.append((y.iloc[i] * w)[0]) 

    
    df_strat = pd.DataFrame(data = returns, index = y.iloc[-(len(returns)):].index, columns = ['Strat Returns'])
    return df_strat


In [545]:
factor_dp = signals['DP'].shift(1).to_frame()
fund_ret= spy_tot_ret['SPY']
OOS_DP_predict = OOS_expand_strat(fund_ret,factor_dp, window_size = 60, weight = 100).rename(columns={'Strat Returns':'DP OOS Returns'})

factor_ep = signals['EP'].shift(1).to_frame()
fund_ret= spy_tot_ret['SPY']
OOS_EP_predict = OOS_expand_strat(fund_ret,factor_ep, window_size = 60, weight = 100).rename(columns={'Strat Returns':'EP OOS Returns'})

factor_both = signals[['DP', 'EP']].shift(1)
fund_ret= spy_tot_ret['SPY']
OOS_BOTH_predict = OOS_expand_strat(fund_ret,factor_both, window_size = 60, weight = 100).rename(columns={'Strat Returns':'DP & EP OOS Returns'})


OOS_predict_agg = pd.concat([OOS_DP_predict, OOS_EP_predict, OOS_BOTH_predict], axis=1)



In [546]:
OOS_predict_agg

Unnamed: 0,DP OOS Returns,EP OOS Returns,DP & EP OOS Returns
1998-02-28,0.1643,0.0512,0.1154
1998-03-31,0.1375,0.0456,0.1004
1998-04-30,0.0395,0.0143,0.0296
1998-05-31,-0.0619,-0.0225,-0.0446
1998-06-30,0.1092,0.0351,0.0717
...,...,...,...
2023-06-30,0.0409,0.0476,0.0364
2023-07-31,0.0193,0.0220,0.0153
2023-08-31,-0.0090,-0.0105,-0.0067
2023-09-30,-0.0267,-0.0321,-0.0213


In [547]:
oos_perf_strat = calc_summary_statistics(OOS_predict_agg)
perf_is_oos = pd.concat([oos_perf_strat, agg_perf_strat])
perf_is_oos[['Annualized Mean', 'Annualized Vol', 'Annualized Sharpe', 'Max Drawdown']]


Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"


Unnamed: 0,Annualized Mean,Annualized Vol,Annualized Sharpe,Max Drawdown
DP OOS Returns,0.0796,0.1737,0.4583,-0.5519
EP OOS Returns,0.0824,0.1637,0.5031,-0.5837
DP & EP OOS Returns,0.0968,0.2261,0.4282,-0.7609
DP,0.1095,0.1489,0.7359,-0.657
EP,0.1081,0.1289,0.8382,-0.3853
All,0.1251,0.1456,0.8591,-0.5246


## 4.3 a)

In [548]:
oos_perf_strat[['Historical VaR (5.00%)']]

Unnamed: 0,Historical VaR (5.00%)
DP OOS Returns,-0.0712
EP OOS Returns,-0.0684
DP & EP OOS Returns,-0.0717


## 4.3 b)

In [549]:
OOS_strats = {'DP OOS': OOS_DP_predict.dropna().rename(columns = {'DP OOS Returns':'Forecast Return'}),
          'EP OOS': OOS_EP_predict.dropna().rename(columns = {'EP OOS Returns':'Forecast Return'}),
          'DP & EP Factors OOS': OOS_BOTH_predict.dropna().rename(columns = {'DP & EP OOS Returns':'Forecast Return'}),
          'Risk Free Rate': rf['US3M'].to_frame('Forecast Return')
         }

OOS_strat_summary_0011 =[]
for k,v in OOS_strats.items():
    strat = (OOS_strats[k]['2000':'2011']['Forecast Return']).to_frame('Forecast Returns')
    perf_summary = calc_summary_statistics(strat)
    perf_summary.index = [k]
    OOS_strat_summary_0011.append(perf_summary)
    
OOS_strat_summary_0011 = pd.concat(OOS_strat_summary_0011)
OOS_strat_summary_0011.loc[:,['Annualized Mean','Annualized Vol','Annualized Sharpe','Max Drawdown']]


Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"
Assuming monthly returns with annualization term of 12
Assuming excess returns were provided to calculate Sharpe. If returns were provided (steady of excess returns), the column "Sharpe" is actually "Mean/Volatility"


Unnamed: 0,Annualized Mean,Annualized Vol,Annualized Sharpe,Max Drawdown
DP OOS,-0.0109,0.1632,-0.0667,-0.5519
EP OOS,0.0388,0.1959,0.1979,-0.5837
DP & EP Factors OOS,0.0433,0.2909,0.1488,-0.7609
Risk Free Rate,0.0231,0.0058,3.9866,0.0


In [550]:
strat_OOS_dp_market = calc_regression(OOS_DP_predict['2000':'2011'], spy_excs_ret['2000':'2011'][['SPY']])[['Annualized Alpha', 'SPY Beta', 'Annualized Information Ratio']]
strat_OOS_ep_market = calc_regression(OOS_EP_predict['2000':'2011'], spy_excs_ret['2000':'2011'][['SPY']])[['Annualized Alpha', 'SPY Beta', 'Annualized Information Ratio']]
strat_OOS_both_market = calc_regression(OOS_BOTH_predict['2000':'2011'], spy_excs_ret['2000':'2011'][['SPY']])[['Annualized Alpha', 'SPY Beta', 'Annualized Information Ratio']]

strat_OOS_markets = pd.concat([strat_OOS_dp_market, strat_OOS_ep_market, strat_OOS_both_market])

strat_OOS_markets

"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios
"calc_regression" assumes excess returns to calculate Information and Treynor Ratios


Unnamed: 0,Annualized Alpha,SPY Beta,Annualized Information Ratio
DP OOS Returns,-0.0062,0.9522,-0.1249
EP OOS Returns,0.0402,0.2961,0.2118
DP & EP OOS Returns,0.0437,0.0761,0.1502


## 4.3 c)

In [469]:
neg_risk_prem_OOS_dp = len(OOS_DP_predict[OOS_DP_predict['DP OOS Returns'] - rf['US3M'] <0])
neg_risk_prem_OOS_ep = len(OOS_EP_predict[OOS_EP_predict['EP OOS Returns'] - rf['US3M'] <0])
neg_risk_prem_OOS_both = len(OOS_BOTH_predict[OOS_BOTH_predict['DP & EP OOS Returns'] - rf['US3M'] <0])
total_days_OOS = len(OOS_BOTH_predict['DP & EP OOS Returns'])

agg_perf_strat_OOS = oos_perf_strat
agg_perf_strat_OOS['Negative Risk Premium Period'] = [neg_risk_prem_OOS_dp, neg_risk_prem_OOS_ep, neg_risk_prem_OOS_both]
agg_perf_strat_OOS['Total Period'] = [total_days_OOS, total_days_OOS, total_days_OOS]

agg_perf_strat_OOS['Negative Risk Premium %'] = (agg_perf_strat_OOS['Negative Risk Premium Period'] / agg_perf_strat_OOS['Total Period']).apply(format_as_percentage)
agg_perf_strat_OOS[['Negative Risk Premium Period', 'Total Period', 'Negative Risk Premium %']]



Unnamed: 0,Negative Risk Premium Period,Total Period,Negative Risk Premium %
DP OOS Returns,122,309,39.48%
EP OOS Returns,120,309,38.83%
DP & EP OOS Returns,121,309,39.16%


## 4.4 d)
    I don't think that dynamic strategy takes on much extra risk. When we compare the negative risk premiums of the OOS and In-sample forecasts, they are very close to each other ~37.5% to ~39.5%