In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels as sm
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 9)
# pd.options.display.float_format = '{:.3f}'.format

In [2]:
signals = pd.read_excel('gmo_analysis_data.xlsx', sheet_name='signals', index_col=0, parse_dates=[0])
rets =  pd.read_excel('gmo_analysis_data.xlsx', sheet_name='returns (total)', index_col=0, parse_dates=[0])
rf =  pd.read_excel('gmo_analysis_data.xlsx', sheet_name='risk-free rate', index_col=0, parse_dates=[0])

In [3]:
display(rets.head())
display(signals.head())

Unnamed: 0_level_0,SPY,GMWAX
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1996-11-30,0.073004,0.045181
1996-12-31,-0.023808,-0.011289
1997-01-31,0.061786,0.014735
1997-02-28,0.009566,0.022265
1997-03-31,-0.044139,-0.015152


Unnamed: 0_level_0,DP,EP,US10Y
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-11-30,2.01,5.14,6.06
1996-12-31,2.0,5.21,6.43
1997-01-31,1.95,5.12,6.53
1997-02-28,1.88,4.98,6.56
1997-03-31,1.9,5.08,6.92


In [4]:
def lagged_regression(y, X, delta_shift = 1):
    X = X.shift(delta_shift).dropna()
    y = y.loc[X.index]
    reg = LinearRegression()
    reg.fit(X, y)
    r2 = reg.score(X, y)
    yhat = reg.predict(X)
    w = 100 * yhat
    name = ', '.join(X.columns)
    print(reg.coef_)
    strategy_rets = (w * y).rename(name)
    return {
        'stats': pd.DataFrame({
            'alpha': reg.intercept_,
            'r^2': r2,
        }, index = [name]),
        'strategy_rets': strategy_rets,
        'y_pred': pd.Series(yhat, index=y.index)
    } 

# pd.concat([lagged_regression(rets['SPY'], signals[['DP']])['stats'], 
#            lagged_regression(rets['SPY'], signals[['EP']])['stats'],
#            lagged_regression(rets['SPY'], signals)['stats'],],axis = 0)


lagged_regression(rets['SPY'], signals[['DP']])['stats']

[0.01135803]


Unnamed: 0,alpha,r^2
DP,-0.012297,0.008732


In [11]:
all_yhat = pd.concat([lagged_regression(rets['SPY'], signals[['DP']])['y_pred'], 
           lagged_regression(rets['SPY'], signals[['EP']])['y_pred'],
           lagged_regression(rets['SPY'], signals)['y_pred']],axis = 1)

all_yhat.columns = ['DP', 'EP', 'Multi']
display(all_yhat)
# display(rets[['SPY']])

[0.01135803]
[0.00257778]
[ 0.00911421  0.00214737 -0.00087332]


Unnamed: 0_level_0,DP,EP,Multi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-12-31,0.010532,0.010085,0.009417
1997-01-31,0.010419,0.010265,0.009153
1997-02-28,0.009851,0.010033,0.008417
1997-03-31,0.009056,0.009673,0.007452
1997-04-30,0.009283,0.009930,0.007535
...,...,...,...
2023-08-31,0.005081,0.006528,0.003904
2023-09-30,0.005308,0.006863,0.004261
2023-10-31,0.005535,0.007610,0.004629
2023-11-30,0.006103,0.007610,0.004832


In [8]:
all_strat_rets = pd.concat([lagged_regression(rets['SPY'], signals[['DP']])['strategy_rets'], 
           lagged_regression(rets['SPY'], signals[['EP']])['strategy_rets'],
           lagged_regression(rets['SPY'], signals)['strategy_rets']],axis = 1)
display(all_strat_rets)
display(rets[['SPY']])

[0.01135803]
[0.00257778]
[ 0.00911421  0.00214737 -0.00087332]


Unnamed: 0_level_0,DP,EP,"DP, EP, US10Y"
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1996-12-31,-0.025076,-0.024011,-0.022421
1997-01-31,0.064374,0.063426,0.056554
1997-02-28,0.009423,0.009598,0.008051
1997-03-31,-0.039972,-0.042694,-0.032893
1997-04-30,0.058115,0.062167,0.047170
...,...,...,...
2023-08-31,-0.008257,-0.010609,-0.006345
2023-09-30,-0.025155,-0.032525,-0.020194
2023-10-31,-0.012016,-0.016521,-0.010049
2023-11-30,0.055746,0.069516,0.044134


Unnamed: 0_level_0,SPY
date,Unnamed: 1_level_1
1996-11-30,0.073004
1996-12-31,-0.023808
1997-01-31,0.061786
1997-02-28,0.009566
1997-03-31,-0.044139
...,...
2023-08-31,-0.016252
2023-09-30,-0.047393
2023-10-31,-0.021709
2023-11-30,0.091344


In [7]:
def risk_metrics(df, adj = 12):
    """
        df: dated DataFrame of returns, 1 column for each ticker
        
        returns a pd.DataFrame
    """
    adj = 12
    perf = pd.DataFrame()
    perf['mean_annualized'] = df.mean() * adj
    perf['vol_annualized'] = df.std() * np.sqrt(adj)
    perf['sharpe_annualized'] = df.mean() / df.std() * np.sqrt(adj)
    perf['VaR (0.05)'] = df.quantile(0.05)
    return perf

risk_metrics(all_strat_rets)

Unnamed: 0,mean_annualized,vol_annualized,sharpe_annualized,VaR (0.05)
DP,0.102007,0.165923,0.614784,-0.055854
EP,0.092969,0.127325,0.730168,-0.057908
"DP, EP, US10Y",0.113018,0.154491,0.731551,-0.066356
