# Backtesting estimation strategies (stocks, corp bonds, govt bonds, gold)

We will compare the following four estimation strategies.  We will locate along the capital allocation line, so that all portfolios have the same expected return as the 1/N portfolio  (which is the cross-sectional average of the historical average returns).
- Est-All
- Est-SD-Corr
- Est-SD
- Est-None

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.optimize import minimize
from cvxopt import matrix
from cvxopt.solvers import qp as Solver, options as SolverOptions

Import data

In [2]:
df = pd.read_csv('https://www.dropbox.com/s/6o4wb38za6g514q/sbbg.csv?dl=1',index_col=['Year'])
df.head()

Unnamed: 0_level_0,TBills,S&P 500,Gold,Corporates,Treasuries
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1968,0.053383,0.108149,0.192439,0.048451,0.032746
1969,0.066667,-0.082414,-0.160667,-0.020252,-0.05014
1970,0.063917,0.035611,0.06163,0.056496,0.167547
1971,0.043325,0.142212,0.166934,0.140015,0.097869
1972,0.040725,0.187554,0.483265,0.114091,0.028184


Create optimization functions for tangency, global minimum variance, and capital allocation

In [3]:
def tangency(means, cov, rf):
    n = len(means)
    def f(w):
        mn = w @ means
        sd = np.sqrt(w @ cov @ w)
        return -(mn - rf) / sd
    # Initial guess (equal-weighted)
    w0 = (1/n)*np.ones(n)
    # Constraint: fully-invested portfolio
    A = np.ones(n)
    b = 1
    cons = [{"type": "eq", "fun": lambda x: A @ x - b}]
    # No short-sale constraint
    bnds = [(None, None) for i in range(n)] 
    # Optimization
    TOL = 10**(-10)
    wgts = minimize(f, w0, bounds=bnds, constraints=cons, options={'ftol':TOL}).x
    return wgts

In [4]:
def gmv(means, cov):
    n = len(means)
    Q = matrix(cov, tc="d")
    p = matrix(np.zeros(n), (n, 1), tc="d")
    # No short-sale constraint    
    G = matrix(np.zeros((n,n)), tc="d")
    h = matrix(np.zeros(n), (n, 1), tc="d")
    # Constraint: fully-invested portfolio
    A = matrix(np.ones(n), (1, n), tc="d")
    b = matrix([1], (1, 1), tc="d")
    sol = Solver(Q, p, G, h, A, b)
    wgts = np.array(sol["x"]).flatten() if sol["status"] == "optimal" else np.array(n * [np.nan])
    return wgts
SolverOptions['show_progress'] = False

In [5]:
def capital_allocation(mean, sd, rf, target_expret):
    ''' locate on CAL to match a target expected return'''
    return (target_expret - rf) / (mean - rf)

## Estimate inputs on rolling windows

Parameters

In [6]:
# Estimation window
WINDOW = 20

# Define the risky asset list
RISKY_ASSETS = ['S&P 500', 'Gold', 'Corporates', 'Treasuries']

# Number of risky assets
n = len(RISKY_ASSETS)

# Length of out-of-sample window
T = len(df)-WINDOW

Standardize column names

In [7]:
asset_list = [str(i) for i in range(n)]
ret_list   = ['r' + asset for asset in asset_list]
df.columns = ['rf'] + ret_list
df.head(2)

Unnamed: 0_level_0,rf,r0,r1,r2,r3
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1968,0.053383,0.108149,0.192439,0.048451,0.032746
1969,0.066667,-0.082414,-0.160667,-0.020252,-0.05014


Estimate expected returns and standard deviations based on historical rolling windows

- Note: rolling command includes current year in calculation

In [8]:
for asset in asset_list:
    df['mn' + asset]=df['r'+asset].rolling(WINDOW).mean()
    df['sd' + asset]=df['r'+asset].rolling(WINDOW).std()

Estimate correlations

In [9]:
corrs = df[ret_list].rolling(WINDOW, min_periods=WINDOW).corr()
corrs.tail(8)

Unnamed: 0_level_0,Unnamed: 1_level_0,r0,r1,r2,r3
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020,r0,1.0,0.070275,0.475592,-0.602105
2020,r1,0.070275,1.0,0.556898,0.264032
2020,r2,0.475592,0.556898,1.0,-0.134618
2020,r3,-0.602105,0.264032,-0.134618,1.0
2021,r0,1.0,-0.027444,0.412879,-0.647386
2021,r1,-0.027444,1.0,0.585691,0.31351
2021,r2,0.412879,0.585691,1.0,-0.065506
2021,r3,-0.647386,0.31351,-0.065506,1.0


In [10]:
# add to dataframe
corr_list = []
for j, asset in enumerate(asset_list):
    for k in range(j+1,n):
        df['c'+asset+str(k)]=corrs.loc[(slice(None),'r'+asset),'r'+str(k)].values
# Reset index to ease looping over years
df = df.reset_index()
df

Unnamed: 0,Year,rf,r0,r1,r2,r3,mn0,sd0,mn1,sd1,mn2,sd2,mn3,sd3,c01,c02,c03,c12,c13,c23
0,1968,0.053383,0.108149,0.192439,0.048451,0.032746,,,,,,,,,,,,,,
1,1969,0.066667,-0.082414,-0.160667,-0.020252,-0.05014,,,,,,,,,,,,,,
2,1970,0.063917,0.035611,0.06163,0.056496,0.167547,,,,,,,,,,,,,,
3,1971,0.043325,0.142212,0.166934,0.140015,0.097869,,,,,,,,,,,,,,
4,1972,0.040725,0.187554,0.483265,0.114091,0.028184,,,,,,,,,,,,,,
5,1973,0.070317,-0.14308,0.73493,0.04318,0.036587,,,,,,,,,,,,,,
6,1974,0.0783,-0.259018,0.670379,-0.043807,0.019886,,,,,,,,,,,,,,
7,1975,0.05775,0.369951,-0.252,0.1105,0.036053,,,,,,,,,,,,,,
8,1976,0.049742,0.23831,-0.040642,0.197528,0.159846,,,,,,,,,,,,,,
9,1977,0.052692,-0.069797,0.230769,0.099547,0.0129,,,,,,,,,,,,,,


In [11]:
# Save a copy of the data
data = df.copy()

# Create dataframe to store the results for each strategy
results = pd.DataFrame(dtype=float, 
    columns=['sr','avg_ret','sd_ret'], 
    index = ['est_all', 'est_sd_corr', 'est_sd','est_none'])

## Est-All

In [12]:
# Load data
df = data.copy()

# Make lists of means, sds, weights columns
mn_list = ['mn'+asset for asset in asset_list]
sd_list = ['sd'+asset for asset in asset_list] 
wgt_list = ['wgt'+asset for asset in asset_list] 

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in np.arange(WINDOW,WINDOW+T):
    # Pull estimation inputs at each point in time
    means = df[mn_list].iloc[i-1].values
    sds   = df[sd_list].iloc[i-1].values
    C  = np.identity(n)
    for j, asset in enumerate(asset_list):
        for k in range(j+1,n):
            C[j, k] = C[k, j] = df.loc[i-1,'c'+asset+str(k)]  
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = tangency(means,cov,rf)
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ means
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    target       = means.mean()
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, target)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [13]:
df = df.dropna()
df[['Year']+wgt_list+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Year,wgt0,wgt1,wgt2,wgt3,rf,risky_ret,wgt_risky,port_ret
20,1988,0.046741,0.230671,1.734061,-1.011472,0.066675,0.150454,0.734038,0.128172
21,1989,0.008168,0.254833,2.250273,-1.513275,0.081117,0.084443,0.500912,0.082783
22,1990,0.125408,0.194271,1.330005,-0.649684,0.074933,0.032465,0.844489,0.039069
23,1991,0.058111,0.187457,1.680626,-0.926193,0.05375,0.160623,0.776417,0.136728
24,1992,0.064038,0.174833,1.523487,-0.762359,0.034317,0.108731,0.856921,0.098084


In [14]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of Est-All:\t {avg_ret: .4f}')
print(f'SD(return) of Est-All:\t {sd_ret: .4f}')
print(f'Sharpe ratio of Est-All: {sr: .4f}')

# Save results
results.loc['est_all','avg_ret'] = avg_ret
results.loc['est_all','sd_ret']  = sd_ret
results.loc['est_all','sr']      = sr

Avg return of Est-All:	  0.0766
SD(return) of Est-All:	  0.0651
Sharpe ratio of Est-All:  0.7452


## Est-SD-Corr

In [15]:
# Load data
df = data.copy()

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in np.arange(WINDOW,WINDOW+T):
    # Pull estimation inputs at each point in time
    means = df[mn_list].iloc[i-1].values
    sds   = df[sd_list].iloc[i-1].values
    C  = np.identity(n)
    for j, asset in enumerate(asset_list):
        for k in range(j+1,n):
            C[j, k] = C[k, j] = df.loc[i-1,'c'+asset+str(k)]  
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = gmv(means,cov)                                       # <------------ different
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ (np.ones(n)*means.mean())             # <------------ different
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    target       = means.mean()
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, target)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [16]:
df = df.dropna()
df[['Year']+wgt_list+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Year,wgt0,wgt1,wgt2,wgt3,rf,risky_ret,wgt_risky,port_ret
20,1988,0.058566,0.127085,0.864139,-0.04979,0.066675,0.116261,1.0,0.116261
21,1989,0.053563,0.127204,0.868483,-0.04925,0.081117,0.142438,1.0,0.142438
22,1990,0.072808,0.138003,0.841629,-0.05244,0.074933,0.042734,1.0,0.042734
23,1991,0.049981,0.141893,1.036918,-0.228793,0.05375,0.152264,1.0,0.152264
24,1992,0.041519,0.14452,1.061836,-0.247875,0.034317,0.100772,1.0,0.100772


In [17]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of Est-SD-Corr:\t {avg_ret: .4f}')
print(f'SD(return) of Est-SD-Corr:\t {sd_ret: .4f}')
print(f'Sharpe ratio of Est-SD-Corr:\t {sr: .4f}')

# Save results
results.loc['est_sd_corr','avg_ret'] = avg_ret
results.loc['est_sd_corr','sd_ret']  = sd_ret
results.loc['est_sd_corr','sr']      = sr


Avg return of Est-SD-Corr:	  0.0855
SD(return) of Est-SD-Corr:	  0.0673
Sharpe ratio of Est-SD-Corr:	  0.8534


## Est-SD

In [18]:
# Load data
df = data.copy()

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in np.arange(WINDOW,WINDOW+T):
    # Pull estimation inputs at each point in time
    means = df[mn_list].iloc[i-1].values
    sds   = df[sd_list].iloc[i-1].values
    C  = np.identity(n)                                         # <------------ different
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = gmv(means,cov)                                       # <------------ different
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ (np.ones(n)*means.mean())             # <------------ different
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    target       = means.mean()
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, target)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [19]:
df = df.dropna()
df[['Year']+wgt_list+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Year,wgt0,wgt1,wgt2,wgt3,rf,risky_ret,wgt_risky,port_ret
20,1988,0.152645,0.027162,0.455033,0.36516,0.066675,0.119788,1.0,0.119788
21,1989,0.151601,0.026109,0.453626,0.368664,0.081117,0.18399,1.0,0.18399
22,1990,0.144025,0.025389,0.463993,0.366593,0.074933,0.046301,1.0,0.046301
23,1991,0.138614,0.024969,0.46208,0.374337,0.05375,0.178174,1.0,0.178174
24,1992,0.134681,0.025028,0.46337,0.376921,0.034317,0.100329,1.0,0.100329


In [20]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of Est-SD:\t {avg_ret: .4f}')
print(f'SD(return) of Est-SD:\t {sd_ret: .4f}')
print(f'Sharpe ratio of Est-SD:\t {sr: .4f}')

# Save results
results.loc['est_sd','avg_ret'] = avg_ret
results.loc['est_sd','sd_ret']  = sd_ret
results.loc['est_sd','sr']      = sr


Avg return of Est-SD:	  0.0860
SD(return) of Est-SD:	  0.0656
Sharpe ratio of Est-SD:	  0.8825


These weights are proportional to 1/variance_i.

In [21]:
df[['Year']+wgt_list+sd_list].head()

Unnamed: 0,Year,wgt0,wgt1,wgt2,wgt3,sd0,sd1,sd2,sd3
20,1988,0.152645,0.027162,0.455033,0.36516,0.165083,0.39779,0.095434,0.105861
21,1989,0.151601,0.026109,0.453626,0.368664,0.164925,0.392808,0.091886,0.103374
22,1990,0.144025,0.025389,0.463993,0.366593,0.16751,0.394681,0.091746,0.101932
23,1991,0.138614,0.024969,0.46208,0.374337,0.172186,0.39943,0.09283,0.102926
24,1992,0.134681,0.025028,0.46337,0.376921,0.172117,0.395042,0.092859,0.101954


In [22]:
x = 1/(df[sd_list]**2)
x['sum'] = x[sd_list].sum(axis=1)
for i, s in enumerate(sd_list):
    x['end_wgt'+str(i)] = x[s] / x['sum']
x.head()

Unnamed: 0,sd0,sd1,sd2,sd3,sum,end_wgt0,end_wgt1,end_wgt2,end_wgt3
20,36.69406,6.31963,109.797602,89.232929,242.044221,0.151601,0.026109,0.453626,0.368664
21,36.764464,6.480975,118.441023,93.578258,255.26472,0.144025,0.025389,0.463993,0.366593
22,35.638604,6.419578,118.803508,96.244458,257.106148,0.138614,0.024969,0.46208,0.374337
23,33.729013,6.267847,116.044948,94.39495,250.436758,0.134681,0.025028,0.46337,0.376921
24,33.75628,6.407872,115.971159,96.204457,252.339769,0.133773,0.025394,0.459583,0.38125


## Est-None

In [23]:
# Load data
df = data.copy()

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in np.arange(WINDOW,WINDOW+T):
    # Pull estimation inputs at each point in time
    means = df[mn_list].iloc[i-1].values
    sds   = df[sd_list].iloc[i-1].values
    means = np.ones(n)*means.mean()                             # <------------ different
    sds   = np.ones(n)*sds.mean()                               # <------------ different
    C  = np.identity(n)                                         # <------------ different
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = gmv(means,cov)                                       # <------------ different
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ (np.ones(n)*means.mean())             # <------------ different
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    target       = means.mean()
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, target)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [24]:
df = df.dropna()
df[['Year']+wgt_list+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Year,wgt0,wgt1,wgt2,wgt3,rf,risky_ret,wgt_risky,port_ret
20,1988,0.25,0.25,0.25,0.25,0.066675,0.060455,1.0,0.060455
21,1989,0.25,0.25,0.25,0.25,0.081117,0.156819,1.0,0.156819
22,1990,0.25,0.25,0.25,0.25,0.074933,0.017043,1.0,0.017043
23,1991,0.25,0.25,0.25,0.25,0.05375,0.133691,1.0,0.133691
24,1992,0.25,0.25,0.25,0.25,0.034317,0.058067,1.0,0.058067


In [25]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of Est-None:\t\t {avg_ret: .4f}')
print(f'SD(return) of Est-None:\t\t {sd_ret: .4f}')
print(f'Sharpe ratio of Est-None:\t {sr: .4f}')

# Save results
results.loc['est_none','avg_ret'] = avg_ret
results.loc['est_none','sd_ret']  = sd_ret
results.loc['est_none','sr']      = sr


Avg return of Est-None:		  0.0825
SD(return) of Est-None:		  0.0663
Sharpe ratio of Est-None:	  0.8207


## Comparing the results

In [26]:
results.round(4)

Unnamed: 0,sr,avg_ret,sd_ret
est_all,0.7452,0.0766,0.0651
est_sd_corr,0.8534,0.0855,0.0673
est_sd,0.8825,0.086,0.0656
est_none,0.8207,0.0825,0.0663


Plot the data

In [27]:
label_dict = {'est_all': 'estimate all inputs',
            'est_sd_corr': 'estimate covariance matrix only',
            'est_sd': 'estimate standard deviations only',
            'est_none': 'equal weights'}

xaxis_label_dict = {'est_all': 'Est-All',
            'est_sd_corr': 'Est-SD-Corr',
            'est_sd': 'Est-SD',
            'est_none': 'Est-None',}
results = results.reset_index()
results['label'] = results['index'].apply(lambda x: label_dict[x])
results['xaxis_label'] = results['index'].apply(lambda x: xaxis_label_dict[x])

In [28]:
string = "Strategy: %{customdata[0]} <br>"
string += "Sharpe ratio: %{y:0.3f}<br>"
string += "Average return: %{customdata[1]:0.1%}<br>"
string += "SD(return): %{customdata[2]:0.1%}<br>"
string += "<extra></extra>"

fig = go.Figure()
fig.add_trace(go.Bar(x=results['xaxis_label'], y=results['sr'], customdata=results[['label','avg_ret','sd_ret']], hovertemplate=string))
fig.layout.yaxis["title"] = "Sharpe ratio"
fig.layout.xaxis["title"] = "Strategy"
fig.show()