# Estimation error and the estimation window

In [26]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.optimize import minimize
from cvxopt import matrix
from cvxopt.solvers import qp as Solver, options as SolverOptions
from scipy.stats import multivariate_normal as mvn

Functions for portfolios and simulation

In [27]:
def tangency(means, cov, rf, short_lb):
    '''
    short_lb: lower bound on position weights
    examples: 0  = no short-selling
              -1 = no more than -100% in a given asset
              None=no restrictions on short-selling
    '''

    n = len(means)
    def f(w):
        mn = w @ means
        sd = np.sqrt(w.T @ cov @ w)
        return -(mn - rf) / sd
    # Initial guess (equal-weighted)
    w0 = (1/n)*np.ones(n)
    # Constraint: fully-invested portfolio
    A = np.ones(n)
    b = 1
    cons = [{"type": "eq", "fun": lambda x: A @ x - b}]
    bnds = [(short_lb, None) for i in range(n)] 
    # Optimization
    wgts_tangency = minimize(f, w0, bounds=bnds, constraints=cons).x
    return wgts_tangency

In [28]:
def gmv(cov, short_lb): 
    '''
    short_lb: lower bound on position weights
    examples: 0  = no short-selling
              -1 = no more than -100% in a given asset
              None=no restrictions on short-selling
    '''    
    n = len(cov)
    Q = matrix(cov, tc="d")
    p = matrix(np.zeros(n), (n, 1), tc="d")
    if short_lb==None:
        # No position limits
        G = matrix(np.zeros((n,n)), tc="d")
        h = matrix(np.zeros(n), (n, 1), tc="d")
    else:
        # Constraint: short-sales not allowed
        G = matrix(-np.identity(n), tc="d")
        h = matrix(-short_lb * np.ones(n), (n, 1), tc="d")
    # Fully-invested constraint
    A = matrix(np.ones(n), (1, n), tc="d")
    b = matrix([1], (1, 1), tc="d")
    sol = Solver(Q, p, G, h, A, b, options={'show_progress': False})
    wgts_gmv = np.array(sol["x"]).flatten() if sol["status"] == "optimal" else np.array(n * [np.nan])
    return wgts_gmv

In [29]:
# Simulation function
def simulation(means, cov, rf, short_lb, seed, window):
	rets = mvn.rvs(means, cov, size=window+T, random_state = seed)
	n = len(means)
	return_list = ['r' + str(i) for i in range(n)]
	mean_list   = ['mn'+ str(i) for i in range(n)]
	sd_list     = ['sd'+ str(i) for i in range(n)]
	corr_list   = ['c' + str(i) + str(j) for i in np.arange(n) for j in np.arange(i+1,n)]
	wgt_list    = ['wgt' + str(i) for i in range(n)]
	df = pd.DataFrame(data=rets, columns=return_list)

	# Estimate rolling window historical inputs
	df[mean_list] = df[return_list].rolling(window).mean()
	df[sd_list]   = df[return_list].rolling(window).std()
	corrs = df[return_list].rolling(window, min_periods=window).corr()
	for i in np.arange(n):
		for j in np.arange(i+1,n):
			df['c'+str(i)+str(j)]=corrs.loc[(slice(None),'r'+str(i)),'r'+str(j)].values
    
	# Calculate (unobservable) true optimal risky and CAL weights
	wgts_true = tangency(means,cov,rf,short_lb)
	wgt_cal_true = (wgts_true @ means - rf) / (RAVER * (wgts_true @ cov @ wgts_true))

	# Initialize empty columns for risky portfolio return, CAL location, CAL portfolio return
	model_list = ['true', 'est_all', 'est_sd_corr', 'est_sd', 'est_none']
	for model in model_list:
		df['risky_ret_'+model] = np.nan  # risky_ret is the realized portfolio return of the 100% risky asset portfolio
		df['wgt_risky_'+model] = np.nan
		df['port_ret_' +model] = np.nan  # port_ret is the realized return of the CAL choice of the raver investor

	# Choose optimal portfolios each time period
	for i in np.arange(window,window+T):
		# Full estimation inputs at each point in time
		means = df[mean_list].iloc[i-1].values
		sds   = df[sd_list].iloc[i-1].values
		C = np.identity(n)
		for i2 in np.arange(n):
			for j in np.arange(i2+1,n):
				C[i2,j] = C[j,i2] = df.loc[i-1,'c'+str(i2)+str(j)]
		cov = np.diag(sds) @ C @ np.diag(sds)

		# Pull current period returns
		rets = df.loc[i,return_list].values

		##### Note: all portfolio weights considered to be beginning of period weights
		##### (so multiply by contemporaneous realized returns)
		# Theoretical optimal weights
		model = 'true'
		df.loc[i,'risky_ret_'+model] = rets @ wgts_true
		df.loc[i,'port_ret_' +model] = rf + wgt_cal_true * (df.loc[i,'risky_ret_'+model] -rf)


		# Full estimation tangency portfolio
		model = 'est_all'
		wgts = tangency(means,cov,rf,short_lb)
		df.loc[i,'risky_ret_'+model] = rets @ wgts
		df.loc[i,'wgt_risky_'+model] = max(0,(wgts @ means - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf)


		# Estimate only covariance matrix
		model = 'est_sd_corr'
		wgts = gmv(cov,short_lb)
		df.loc[i,'risky_ret_'+model] = rets @ wgts
		df.loc[i,'wgt_risky_'+model] = max(0,(means.mean() - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf)


		# Estimate only standard deviations in covariance matrix
		model = 'est_sd'
		for i2 in np.arange(n):
			for j in np.arange(i2+1,n):
				cov[i2,j] = cov[j,i2] =0.0		
		wgts = gmv(cov,short_lb)
		df.loc[i,'risky_ret_'+model] = rets @ wgts		
		df.loc[i,'wgt_risky_'+model] = max(0,(means.mean() - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf)  

		# Equal-weighted portfolio
		model = 'est_none'
		for i2 in np.arange(n):
			cov[i2,i2] = (sds.mean())**2
		wgts = (1/n)*np.ones(n)
		df.loc[i,'risky_ret_'+model] = rets @ wgts		
		df.loc[i,'wgt_risky_'+model] = max(0,(means.mean() - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf) 



	portret_list = ['port_ret_'+model for model in model_list]
	stats = df[portret_list].describe()

	sr_df = pd.DataFrame(dtype=float, columns = ['sr'], index = model_list)
	for model in model_list:
		sr_df.loc[model,'sr'] = (stats.loc['mean','port_ret_'+model] - rf)/stats.loc['std','port_ret_'+model]
		
	return sr_df

# Varying Window Length

In [30]:
## Run for a systematic list of inputs (varying window length)
# Took 1 hour to run 10 parms @ 250 sims each

# Risk aversion
RAVER = 2

# Risk-free rate
RF = 0.02

# Investment period
T = 50

# Number of simulations
NUM_SIMS = 250

# Asset Parameters
mns1 = np.array([0.06, 0.10, 0.14])
mns2 = np.array([0.08, 0.10, 0.12])
mns3 = np.array([0.10, 0.10, 0.10])

sds1 = np.array([0.16, 0.20, 0.24])
sds2 = np.array([0.32, 0.40, 0.48])
sds3 = np.array([0.18, 0.20, 0.22])

c1 = 0.75
c2 = 0.50

w1 = 10
w2 = 20
w3 = 30
w4 = 40
w5 = 50

mns_dict = {'mns1':mns1, 'mns2':mns2}
sds_dict = {'sds1':sds1}
corr_dict= {'c1':c1}
window_dict = {'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5 }

iterables = [list(mns_dict.keys()),
             list(sds_dict.keys()),
             list(corr_dict.keys()), 
             list(window_dict.keys()),
             np.arange(NUM_SIMS)]
idx = pd.MultiIndex.from_product(iterables, names=["means", "sds", "corrs", "window", "sim"])
sim_results = pd.DataFrame(dtype='float', columns=['true', 'est_all', 'est_sd_corr', 'est_sd', 'est_none'], index=idx)

for m in list(mns_dict.keys()):
    means = mns_dict[m]
    n = len(means)
    for s in list(sds_dict.keys()):
        sds = sds_dict[s]
        for c in list(corr_dict.keys()):
            corr12 = corr13 = corr23 = corr_dict[c]
            # Covariance matrix
            C  = np.identity(3)
            C[0, 1] = C[1, 0] = corr12
            C[0, 2] = C[2, 0] = corr13
            C[1, 2] = C[2, 1] = corr23
            cov = np.diag(sds) @ C @ np.diag(sds)

            for w in list(window_dict.keys()):

                print(m + "\t" + s +  "\t" + c + "\t" + w)

                # Outputs
                # wgts_true = tangency(means,cov,r,short_lb=None)
                # print(wgts_true)
                # wgt_cal_true = (wgts_true @ means - r) / (raver * (wgts_true @ cov @ wgts_true))
                # print(wgt_cal_true)

                # Run the simulations
                for sim in range(NUM_SIMS):
                    if np.mod(sim,25)==0:
                        print('Simulation number: ' + str(sim))
                    sim_results.loc[(m,s,c,w,sim)] = simulation(means, cov, RF, short_lb=None, seed=sim, window=window_dict[w]).T.values

# Save results
sim_results.to_csv('../data/sim_results_vary_window.csv')

mns1	sds1	c1	w1
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number: 75
Simulation number: 100
Simulation number: 125
Simulation number: 150
Simulation number: 175
Simulation number: 200
Simulation number: 225
mns1	sds1	c1	w2
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number: 75
Simulation number: 100
Simulation number: 125
Simulation number: 150
Simulation number: 175
Simulation number: 200
Simulation number: 225
mns1	sds1	c1	w3
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number: 75
Simulation number: 100
Simulation number: 125
Simulation number: 150
Simulation number: 175
Simulation number: 200
Simulation number: 225
mns1	sds1	c1	w4
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number: 75
Simulation number: 100
Simulation number: 125
Simulation number: 150
Simulation number: 175
Simulation number: 200
Simulation number: 225
mns1	sds1	c1	w5
Simulation number: 0

### Read saved data and compare Sharpe ratios


In [31]:
sim_results =pd.read_csv('../data/sim_results_vary_window.csv')
sim_results.head()


Unnamed: 0,means,sds,corrs,window,sim,true,est_all,est_sd_corr,est_sd,est_none
0,mns1,sds1,c1,w1,0,0.485014,0.244027,-0.174769,0.144301,0.184277
1,mns1,sds1,c1,w1,1,0.471938,0.307943,0.216782,0.348175,0.365837
2,mns1,sds1,c1,w1,2,0.35554,0.268477,0.080505,0.206039,0.230139
3,mns1,sds1,c1,w1,3,0.639033,0.537514,0.121981,0.483934,0.527965
4,mns1,sds1,c1,w1,4,0.456634,0.263591,0.287175,0.42816,0.446544


In [32]:
stats = sim_results.groupby(['means', 'sds','corrs','window']).mean()
stats = stats[['true','est_all', 'est_sd_corr', 'est_sd','est_none']]
stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,true,est_all,est_sd_corr,est_sd,est_none
means,sds,corrs,window,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
mns1,sds1,c1,w1,0.553081,0.295428,0.11559,0.286633,0.317707
mns1,sds1,c1,w2,0.546252,0.392292,0.144398,0.3168,0.348379
mns1,sds1,c1,w3,0.546227,0.428758,0.151906,0.333061,0.366756
mns1,sds1,c1,w4,0.555677,0.456301,0.162364,0.351472,0.385965
mns1,sds1,c1,w5,0.56129,0.478236,0.180459,0.374443,0.409465
mns2,sds1,c1,w1,0.439653,0.205701,0.209793,0.309128,0.317707
mns2,sds1,c1,w2,0.434277,0.275294,0.26763,0.343015,0.348379
mns2,sds1,c1,w3,0.43193,0.307575,0.288028,0.361346,0.366756
mns2,sds1,c1,w4,0.441196,0.333877,0.304365,0.380328,0.385965
mns2,sds1,c1,w5,0.448782,0.35872,0.327169,0.40424,0.409465


In [33]:
def compare_plot(mns,sds,corr):
    newdf = stats.loc[(mns,sds,corr,slice(None))].stack().reset_index()
    newdf.columns=['window','strategy','sr']
    label_dict = {'true':'True',
                'est_none': 'Est-None',
                'est_all': 'Est-All',
                'est_sd_corr': 'Est-SD-Corr',
                'est_sd': 'Est-SD'}

    newdf['strategy'] = newdf['strategy'].apply(lambda y: label_dict[y])
    newdf['window'] = newdf['window'].apply(lambda y: window_dict[y])
    # newdf
    import plotly.express as px
    fig = go.Figure()
    fig = px.histogram(newdf, x="strategy", y="sr",
                color='window', barmode='group', histfunc='avg',
                height=400)
    fig.layout.yaxis["title"] = "Sharpe ratio"
    fig.layout.xaxis["title"] = "Strategy"             
    fig.show()


### More expected return variation

In [34]:
compare_plot('mns1','sds1','c1')

### Less expected return variation

In [35]:

compare_plot('mns2','sds1','c1')

### Compare average sharpe ratios across strategies for a single estimation window size

In [36]:
def compare_means_singlehorizon(mns,sds,corr,w):
    newdf = stats.loc[(mns,sds,corr,w)].reset_index()
    newdf.columns=['strategy','sr']

    label_dict = {'true':'True',
                'est_none': 'Est-None',
                'est_all': 'Est-All',
                'est_sd_corr': 'Est-SD-Corr',
                'est_sd': 'Est-SD'}

    newdf['strategy'] = newdf['strategy'].apply(lambda y: label_dict[y])
    # newdf['window'] = newdf['window'].apply(lambda y: window_dict[y])
    # newdf
    import plotly.express as px
    fig = go.Figure()
    fig = px.histogram(newdf, x="strategy", y="sr", barmode='group', histfunc='avg',
                height=400)
    fig.layout.yaxis["title"] = "Sharpe ratio"
    fig.layout.xaxis["title"] = "Strategy" 
    fig.update_layout(title_text='Estimation window is: ' + str(window_dict[w]) + ' years')
    fig.show()

### More expected return variation

In [37]:
# compare_means_singlehorizon('mns1','sds1','c1','w1')
compare_means_singlehorizon('mns1','sds1','c1','w3')
# compare_means_singlehorizon('mns1','sds1','c1','w5')

### Less expected return variation

In [38]:
# compare_means_singlehorizon('mns2','sds1','c1','w1')
compare_means_singlehorizon('mns2','sds1','c1','w3')
# compare_means_singlehorizon('mns2','sds1','c1','w5')

### Plot distribution of SRs as a function of strategy for a given estimation window

In [39]:
import plotly.graph_objects as go
def boxplots(df, w):
    sim_results = df[df.window==w].copy()
    sim_results = sim_results[['true', 'est_none', 'est_all', 'est_sd_corr', 'est_sd']]

    label_dict = {'true': 'True', 
                'est_none': 'Est-None',
                'est_all': 'Est-All',
                'est_sd_corr': 'Est-SD-Corr',
                'est_sd': 'Est-SD'}
    fig = go.Figure()
    for c in ['true','est_all','est_sd_corr','est_sd','est_none']:
        fig.add_trace(go.Box(y=sim_results[c], boxmean='sd', name = label_dict[c])) 
    fig.update_layout(title_text='Estimation window is: ' + str(window_dict[w]) + ' years')
    fig.show()
for w in list(window_dict.keys()):
    boxplots(sim_results[(sim_results.means=='mns1') & (sim_results.sds=='sds1') & (sim_results.corrs=='c1')],  w)

### Plot distribution of SRs as a function of estimation window for a given strategy

In [40]:
import plotly.graph_objects as go
def boxplots_time(df, method):
    sim_results = df.copy()

    label_dict = {'true': 'True', 
                'est_none': 'Est-None',
                'est_all': 'Est-All',
                'est_sd_corr': 'Est-SD-Corr',
                'est_sd': 'Est-SD'}
    fig = go.Figure()
    for w in list(window_dict.keys()):
        # fig.add_trace(go.Box(y=sim_results.loc[('mns1','sds2','c1',w,slice(None)),method], boxmean='sd', name = window_dict[w])) 
        fig.add_trace(go.Box(y=sim_results[(sim_results.window==w)][method], boxmean='sd', name = window_dict[w])) 
    fig.update_layout(title_text='Strategy is:\t' + label_dict[method])
    fig.show()


In [41]:
for c in ['est_all','est_sd_corr','est_sd','est_none']:
    boxplots_time(sim_results[(sim_results.means=='mns1') & (sim_results.sds=='sds1') & (sim_results.corrs=='c1')],   c)