# Estimation error and the number of assets
3, 5, 10 assets

In [74]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.optimize import minimize
from cvxopt import matrix
from cvxopt.solvers import qp as Solver, options as SolverOptions
from scipy.stats import multivariate_normal as mvn


Functions for portfolios and simulation

In [75]:
def tangency(means, cov, rf, short_lb):
    '''
    short_lb: lower bound on position weights
    examples: 0  = no short-selling
              -1 = no more than -100% in a given asset
              None=no restrictions on short-selling
    '''

    n = len(means)
    def f(w):
        mn = w @ means
        sd = np.sqrt(w.T @ cov @ w)
        return -(mn - rf) / sd
    # Initial guess (equal-weighted)
    w0 = (1/n)*np.ones(n)
    # Constraint: fully-invested portfolio
    A = np.ones(n)
    b = 1
    cons = [{"type": "eq", "fun": lambda x: A @ x - b}]
    bnds = [(short_lb, None) for i in range(n)] 
    # Optimization
    wgts_tangency = minimize(f, w0, bounds=bnds, constraints=cons).x
    return wgts_tangency

In [76]:
def gmv(cov, short_lb): 
    '''
    short_lb: lower bound on position weights
    examples: 0  = no short-selling
              -1 = no more than -100% in a given asset
              None=no restrictions on short-selling
    '''    
    n = len(cov)
    Q = matrix(cov, tc="d")
    p = matrix(np.zeros(n), (n, 1), tc="d")
    if short_lb==None:
        # No position limits
        G = matrix(np.zeros((n,n)), tc="d")
        h = matrix(np.zeros(n), (n, 1), tc="d")
    else:
        # Constraint: short-sales not allowed
        G = matrix(-np.identity(n), tc="d")
        h = matrix(-short_lb * np.ones(n), (n, 1), tc="d")
    # Fully-invested constraint
    A = matrix(np.ones(n), (1, n), tc="d")
    b = matrix([1], (1, 1), tc="d")
    sol = Solver(Q, p, G, h, A, b, options={'show_progress': False})
    wgts_gmv = np.array(sol["x"]).flatten() if sol["status"] == "optimal" else np.array(n * [np.nan])
    return wgts_gmv

In [77]:
# Simulation function
def simulation(means, cov, rf, short_lb, seed, window):
	rets = mvn.rvs(means, cov, size=window+T, random_state = seed)
	n = len(means)
	return_list = ['r' + str(i) for i in range(n)]
	mean_list   = ['mn'+ str(i) for i in range(n)]
	sd_list     = ['sd'+ str(i) for i in range(n)]
	corr_list   = ['c' + str(i) + str(j) for i in np.arange(n) for j in np.arange(i+1,n)]
	wgt_list    = ['wgt' + str(i) for i in range(n)]
	df = pd.DataFrame(data=rets, columns=return_list)

	# Estimate rolling window historical inputs
	df[mean_list] = df[return_list].rolling(window).mean()
	df[sd_list]   = df[return_list].rolling(window).std()
	corrs = df[return_list].rolling(window, min_periods=window).corr()
	for i in np.arange(n):
		for j in np.arange(i+1,n):
			df['c'+str(i)+str(j)]=corrs.loc[(slice(None),'r'+str(i)),'r'+str(j)].values
    
	# Calculate (unobservable) true optimal risky and CAL weights
	wgts_true = tangency(means,cov,rf,short_lb)
	wgt_cal_true = (wgts_true @ means - rf) / (RAVER * (wgts_true @ cov @ wgts_true))

	# Initialize empty columns for risky portfolio return, CAL location, CAL portfolio return
	model_list = ['true', 'est_all', 'est_sd_corr', 'est_sd', 'est_none']
	for model in model_list:
		df['risky_ret_'+model] = np.nan  # risky_ret is the realized portfolio return of the 100% risky asset portfolio
		df['wgt_risky_'+model] = np.nan
		df['port_ret_' +model] = np.nan  # port_ret is the realized return of the CAL choice of the raver investor

	# Choose optimal portfolios each time period
	for i in np.arange(window,window+T):
		# Full estimation inputs at each point in time
		means = df[mean_list].iloc[i-1].values
		sds   = df[sd_list].iloc[i-1].values
		C = np.identity(n)
		for i2 in np.arange(n):
			for j in np.arange(i2+1,n):
				C[i2,j] = C[j,i2] = df.loc[i-1,'c'+str(i2)+str(j)]
		cov = np.diag(sds) @ C @ np.diag(sds)

		# Pull current period returns
		rets = df.loc[i,return_list].values

		##### Note: all portfolio weights considered to be beginning of period weights
		##### (so multiply by contemporaneous realized returns)
		# Theoretical optimal weights
		model = 'true'
		df.loc[i,'risky_ret_'+model] = rets @ wgts_true
		df.loc[i,'port_ret_' +model] = rf + wgt_cal_true * (df.loc[i,'risky_ret_'+model] -rf)


		# Full estimation tangency portfolio
		model = 'est_all'
		wgts = tangency(means,cov,rf,short_lb)
		df.loc[i,'risky_ret_'+model] = rets @ wgts
		df.loc[i,'wgt_risky_'+model] = max(0,(wgts @ means - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf)


		# Estimate only covariance matrix
		model = 'est_sd_corr'
		wgts = gmv(cov,short_lb)
		df.loc[i,'risky_ret_'+model] = rets @ wgts
		df.loc[i,'wgt_risky_'+model] = max(0,(means.mean() - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf)


		# Estimate only standard deviations in covariance matrix
		model = 'est_sd'
		for i2 in np.arange(n):
			for j in np.arange(i2+1,n):
				cov[i2,j] = cov[j,i2] =0.0		
		wgts = gmv(cov,short_lb)
		df.loc[i,'risky_ret_'+model] = rets @ wgts		
		df.loc[i,'wgt_risky_'+model] = max(0,(means.mean() - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf)  

		# Equal-weighted portfolio
		model = 'est_none'
		for i2 in np.arange(n):
			cov[i2,i2] = (sds.mean())**2
		wgts = (1/n)*np.ones(n)
		df.loc[i,'risky_ret_'+model] = rets @ wgts		
		df.loc[i,'wgt_risky_'+model] = max(0,(means.mean() - rf) / (RAVER * (wgts @ cov @ wgts)))
		df.loc[i,'port_ret_' +model] = rf + df.loc[i,'wgt_risky_'+model]  * (df.loc[i,'risky_ret_'+model] -rf) 



	portret_list = ['port_ret_'+model for model in model_list]
	stats = df[portret_list].describe()

	sr_df = pd.DataFrame(dtype=float, columns = ['sr'], index = model_list)
	for model in model_list:
		sr_df.loc[model,'sr'] = (stats.loc['mean','port_ret_'+model] - rf)/stats.loc['std','port_ret_'+model]
		
	return sr_df


Parameters

In [78]:
# Risk aversion
RAVER = 2

# Risk-free rate
RF = 0.02

# Estimation Window

Adjust covariance matrix so that the underlying theoretical Sharpe ratio is constant

In [79]:
mns3 = np.array([0.06, 0.10, 0.14])
mns5 = np.array([0.06, 0.10, 0.14, 0.18, 0.22])
mns10= np.array([0.06, 0.10, 0.14, 0.18, 0.22, 0.06, 0.10, 0.14, 0.18, 0.22])

sds3 = np.array([0.16, 0.20, 0.24])
sds5 = np.array([0.16, 0.20, 0.24, 0.28, 0.32])
sds10= np.array([0.16, 0.20, 0.24, 0.28, 0.32, 0.16, 0.20, 0.24, 0.28, 0.32])

corr = 0.5

mns_dict = {'3':mns3, '5':mns5, '10':mns10}
sds_dict = {'3':sds3, '5':sds5, '10':sds10}

# Check Sharpe ratios at current params
sharpes = np.zeros(len(mns_dict.keys()))
for k,key in enumerate(mns_dict.keys()):
    print('Checking current SRs') 
    print(f'\nNumber of assets: {key}')
    means = mns_dict[key]
    sds   = sds_dict[key]
    n = len(means)
    C = np.identity(n)
    for i in np.arange(0,n):
        for j in np.arange(i+1,n):
            C[i,j] = C[j,i] = corr
    cov = np.diag(sds) @ C @ np.diag(sds)
    wgts_true = tangency(means,cov,RF,short_lb=None)
    print(wgts_true)
    sr_true = (wgts_true @ means - RF) / (np.sqrt(wgts_true @ cov @ wgts_true))
    print('Sharpe ratio with unadjusted covariance matrix:')    
    print(sr_true)
    sharpes[k] = sr_true

# Adjust covariance matrix so theoretical sharpe ratio is same
print('\nTesting new covariance matrix produces same Sharpe ratios')
for k,key in enumerate(mns_dict.keys()):
    print(f'\nNumber of assets: {key}')
    means = mns_dict[key]
    sds   = sds_dict[key]
    n = len(means)
    C = np.identity(n)
    for i in np.arange(0,n):
        for j in np.arange(i+1,n):
            C[i,j] = C[j,i] = corr
    cov = np.diag(sds) @ C @ np.diag(sds)
    cov = cov * (sharpes[k]/sharpes[0])**2
    print('Standard Deviations:')
    print(np.sqrt(np.diag(cov)))
    wgts_true = tangency(means,cov,RF,short_lb=None)
    # print(wgts_true)
    sr_true = (wgts_true @ means - RF) / (np.sqrt(wgts_true @ cov @ wgts_true))
    print('Sharpe ratio with adjusted covariance matrix:')        
    print(sr_true)


Checking current SRs

Number of assets: 3
[-0.19521254  0.4643534   0.73085914]
Sharpe ratio with unadjusted covariance matrix:
0.5326815603694003
Checking current SRs

Number of assets: 5
[-0.88858366  0.04465473  0.45750009  0.64758459  0.73884424]
Sharpe ratio with unadjusted covariance matrix:
0.7376094416311172
Checking current SRs

Number of assets: 10
[-2.66388897 -0.31883859  0.74535036  1.23841602  1.4989613  -2.6638891
 -0.31883854  0.74535032  1.23841598  1.49896123]
Sharpe ratio with unadjusted covariance matrix:
0.8685902437905962

Testing new covariance matrix produces same Sharpe ratios

Number of assets: 3
Standard Deviations:
[0.16 0.2  0.24]
Sharpe ratio with adjusted covariance matrix:
0.5326815603694003

Number of assets: 5
Standard Deviations:
[0.22155359 0.27694198 0.33233038 0.38771878 0.44310717]
Sharpe ratio with adjusted covariance matrix:
0.5326807274473699

Number of assets: 10
Standard Deviations:
[0.26089591 0.32611988 0.39134386 0.45656784 0.52179181 0.26

## Run the simulation and save data

In [80]:
## Run for a systematic list of inputs (varying asset number)
NUM_SIMS = 500
WINDOW   = 20
T        = 50


iterables = [list(mns_dict.keys()),
             np.arange(NUM_SIMS)]
idx = pd.MultiIndex.from_product(iterables, names=["n_assets", "sim"])
sim_results = pd.DataFrame(dtype='float', columns=['true', 'est_all', 'est_sd_corr', 'est_sd', 'est_none'], index=idx)
                                                  


In [81]:
for k,key in enumerate(mns_dict.keys()):
    print(key)
    means = mns_dict[key]
    sds   = sds_dict[key]
    n = len(means)
    C = np.identity(n)
    for i in np.arange(0,n):
        for j in np.arange(i+1,n):
            C[i,j] = C[j,i] = corr
    cov = np.diag(sds) @ C @ np.diag(sds)
    cov = cov * (sharpes[k]/sharpes[0])**2

    # Run the simulations
    for sim in range(NUM_SIMS):
        if np.mod(sim,25)==0:
            print('Simulation number: ' + str(sim))
        sim_results.loc[(key,sim)] = simulation(means, cov, RF, short_lb=None, seed=sim, window=WINDOW).T.values

# Save results
sim_results.to_csv('../data/sim_results_vary_nassets.csv')


3
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number: 75
Simulation number: 100
Simulation number: 125
Simulation number: 150
Simulation number: 175
Simulation number: 200
Simulation number: 225
Simulation number: 250
Simulation number: 275
Simulation number: 300
Simulation number: 325
Simulation number: 350
Simulation number: 375
Simulation number: 400
Simulation number: 425
Simulation number: 450
Simulation number: 475
5
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number: 75
Simulation number: 100
Simulation number: 125
Simulation number: 150
Simulation number: 175
Simulation number: 200
Simulation number: 225
Simulation number: 250
Simulation number: 275
Simulation number: 300
Simulation number: 325
Simulation number: 350
Simulation number: 375
Simulation number: 400
Simulation number: 425
Simulation number: 450
Simulation number: 475
10
Simulation number: 0
Simulation number: 25
Simulation number: 50
Simulation number:

### Read saved data and compare Sharpe ratios


In [82]:
sim_results = pd.read_csv('../data/sim_results_vary_nassets.csv')
sim_results.head()

Unnamed: 0,n_assets,sim,true,est_all,est_sd_corr,est_sd,est_none
0,3,0,0.351705,0.188265,0.079492,0.171911,0.196575
1,3,1,0.374888,0.230869,0.295605,0.347252,0.359619
2,3,2,0.258342,0.240074,0.187756,0.208892,0.151306
3,3,3,0.622437,0.438985,0.38709,0.488606,0.541386
4,3,4,0.478478,0.354124,0.388637,0.494816,0.500837


In [83]:
stats = sim_results.groupby(['n_assets']).mean()
stats = stats.reset_index()
stats['num'] = stats['n_assets'].apply(lambda x: int(x))
stats = stats.sort_values('num')
stats = stats[['num','true','est_all', 'est_sd_corr', 'est_sd','est_none']]

In [84]:
import plotly.express as px
newdf = stats.set_index('num').stack().reset_index()
newdf.columns=['n_assets','strategy','sr']
label_dict = {'true':'True',
            'est_none': 'Est-None',
            'est_all': 'Est-All',
            'est_sd_corr': 'Est-SD-Corr',
            'est_sd': 'Est-SD'}

newdf['strategy'] = newdf['strategy'].apply(lambda y: label_dict[y])
fig = go.Figure()
fig = px.histogram(newdf, x="strategy", y="sr",
            color='n_assets', barmode='group', histfunc='avg',
            height=400)
fig.layout.yaxis["title"] = "Sharpe ratio"
fig.layout.xaxis["title"] = "Strategy"             
fig.show()

### Plot distribution of SRs as a function of number of assets for a strategy

In [85]:
import plotly.graph_objects as go
def boxplots(df,method):
    sim_results = df.copy()
    label_dict = {'true': 'True', 
                'est_none': 'Est-None',
                'est_all': 'Est-All',
                'est_sd_corr': 'Est-SD-Corr',
                'est_sd': 'Est-SD'}
    fig = go.Figure()
    sim_results = sim_results.set_index('n_assets')
    for key in mns_dict.keys():
        fig.add_trace(go.Box(y=sim_results.loc[int(key),method], boxmean='sd', name = key)) 
    fig.layout.yaxis["title"] = "Sharpe ratio"
    fig.layout.xaxis["title"] = "N(assets)"           
    fig.update_layout(title_text='Strategy is:\t' + label_dict[method])
    fig.show()

In [86]:
for c in ['est_all','est_sd_corr','est_sd','est_none']:
    boxplots(sim_results,c)