# Backtesting constrained strategies (industry portfolios): GMV

We will compare the following four strategies.  Based on the estimated inputs each month, we will consider the optimal portfolio for a mean-variance investor with risk aversion of 3 that either is or is not short-sale constrained.
- No position limits
- No short sales
- No short sales and maximum positions
- Equal-weighted



In [1]:
import numpy as np
import pandas as pd
from pandas_datareader import DataReader as pdr
import plotly.graph_objects as go
from scipy.optimize import minimize
from cvxopt import matrix
from cvxopt.solvers import qp as Solver, options as SolverOptions

Pull data

In [2]:
# Read industry data and clean-up missing data (coded -99.99)
ff48 = pdr("48_Industry_Portfolios", "famafrench", start=1900)[0]

# Clean-up missings
for c in ff48.columns:
    ff48[c] = np.where(ff48[c]==-99.99, np.nan, ff48[c])
ff48 = ff48/100

# Add risk-free rate
ff3 = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100
df = ff48.join(ff3['RF'])

# Keep data since Jan 1970
df = df.loc['1970-01':].copy() # There is missing data prior to 1970

Create functions for tangency portfolio (with upper and lower position limits) and capital allocation

In [14]:
def constrained_gmv(means, cov, min_wgt, max_wgt):
    n = len(means)
    Q = matrix(cov, tc="d")
    p = matrix(np.zeros(n), (n, 1), tc="d")
    if max_wgt == None:
        if min_wgt==None:
            # No position limits
            G = matrix(np.zeros((n,n)), tc="d")
            h = matrix(np.zeros(n), (n, 1), tc="d")  
        else:
            # Constraint: min-limit
            G = matrix(-np.identity(n), tc="d")
            h = matrix(min_wgt*np.ones(n), (n, 1), tc="d")  
    else:
        if min_wgt==None:
            # Constraint: max-position limit only
            G = matrix(np.identity(n), tc="d")
            h = matrix(max_wgt*np.ones(n), (n, 1), tc="d")  
        else:
            # Constraint: minimum and maximum position limits
            G = matrix(np.vstack((-np.identity(n), np.identity(n))), tc="d")
            h = matrix(np.append(min_wgt*np.ones(n), max_wgt*np.ones(n)), (2*n, 1), tc="d")        
    # Constraint: fully-invested portfolio
    A = matrix(np.ones(n), (1, n), tc="d")
    b = matrix([1], (1, 1), tc="d")
    sol = Solver(Q, p, G, h, A, b)
    wgts = np.array(sol["x"]).flatten() if sol["status"] == "optimal" else np.array(n * [np.nan])
    return wgts   
SolverOptions['show_progress'] = False 

In [4]:
def capital_allocation(mean, sd, rf, risk_aversion):
    return (mean - rf) / (risk_aversion * (sd**2))

## Estimate inputs on expanding windows

Parameters

In [5]:
df.columns

Index(['Agric', 'Food ', 'Soda ', 'Beer ', 'Smoke', 'Toys ', 'Fun  ', 'Books',
       'Hshld', 'Clths', 'Hlth ', 'MedEq', 'Drugs', 'Chems', 'Rubbr', 'Txtls',
       'BldMt', 'Cnstr', 'Steel', 'FabPr', 'Mach ', 'ElcEq', 'Autos', 'Aero ',
       'Ships', 'Guns ', 'Gold ', 'Mines', 'Coal ', 'Oil  ', 'Util ', 'Telcm',
       'PerSv', 'BusSv', 'Comps', 'Chips', 'LabEq', 'Paper', 'Boxes', 'Trans',
       'Whlsl', 'Rtail', 'Meals', 'Banks', 'Insur', 'RlEst', 'Fin  ', 'Other',
       'RF'],
      dtype='object')

In [6]:
# Initial estimation window
WINDOW = 180

# Define the risky asset list
RISKY_ASSETS = df.columns[:-1]

# Number of risky assets
n = len(RISKY_ASSETS)

# Length of out-of-sample window
T = len(df)-WINDOW

Standardize column names

In [7]:
asset_list = [str(i) for i in range(n)]
ret_list   = ['r' + asset for asset in asset_list]
df.columns = ret_list + ['rf']
df.head(2)

Unnamed: 0_level_0,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,...,r39,r40,r41,r42,r43,r44,r45,r46,r47,rf
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-01,0.0083,-0.0281,-0.0276,-0.0135,-0.0699,-0.0795,-0.0574,-0.1139,-0.069,-0.0509,...,-0.0762,-0.0768,-0.0573,-0.1186,-0.0753,-0.0888,-0.1094,-0.1082,-0.0359,0.006
1970-02,0.0948,0.0596,0.0386,0.0687,0.0028,0.0612,0.0822,0.0068,0.0028,0.0322,...,0.1048,0.0181,0.0579,0.0543,0.1554,0.1035,0.0024,0.0918,-0.0481,0.0062


Estimate expected returns, standard deviations, & correlations based on historical expanding windows

- Note: expanding command includes current year in calculation

In [8]:
est_means = df[ret_list].expanding(WINDOW).mean()
est_sds   = df[ret_list].expanding(WINDOW).std()
est_corrs = df[ret_list].expanding(WINDOW).corr()

Subset data with estimates

In [9]:
est_means = est_means.dropna()
est_sds   = est_sds.dropna()
est_corrs = est_corrs.dropna()
df = df.loc[est_means.index].reset_index()  

In [10]:
# Save a copy of the data
data = df.copy()

# Create dataframe to store the results for each strategy
results = pd.DataFrame(dtype=float, 
    columns=['sr','avg_ret','sd_ret'], 
    index = ['Unconstrained', 'No shorting', 'No shorting & max positions', 'Equal-weighted'])

## No constraints

In [11]:
# Set position limits
MIN_WGT = None
MAX_WGT = None
RAVER = 3

In [15]:
# Load data
df = data.copy()

# Make list of weights columns
wgt_list= ['wgt'+asset for asset in asset_list] 

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in df.index[1:]:                              # Start at 2nd date because estimates include that month's obs
    # Pull estimation inputs at each point in time
    est_pull_date = df.loc[i-1,'Date']
    means = est_means.loc[est_pull_date].values
    sds   = est_sds.loc[est_pull_date].values
    C     = est_corrs.loc[(est_pull_date,slice(None))].to_numpy()
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = constrained_gmv(means, cov, MIN_WGT, MAX_WGT)
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ means
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, RAVER)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [16]:
df = df.dropna()
df_unconstrained = df.copy()
df[['Date']+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Date,rf,risky_ret,wgt_risky,port_ret
1,1985-01,0.0065,0.00934,1.634334,0.011142
2,1985-02,0.0058,0.032529,1.983793,0.058826
3,1985-03,0.0062,0.070052,1.801128,0.121205
4,1985-04,0.0072,0.004017,1.371442,0.002835
5,1985-05,0.0066,0.064387,1.65531,0.102256


In [17]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of Unconstrained:\t {avg_ret: .4f}')
print(f'SD(return) of Unconstrained:\t {sd_ret: .4f}')
print(f'Sharpe ratio of Unconstrained:\t {sr: .4f}')

# Save results
results.loc['Unconstrained','avg_ret'] = avg_ret
results.loc['Unconstrained','sd_ret']  = sd_ret
results.loc['Unconstrained','sr']      = sr

Avg return of Unconstrained:	  0.0129
SD(return) of Unconstrained:	  0.0942
Sharpe ratio of Unconstrained:	  0.1096


## Short-Sale Constraints 

In [18]:
# Set position limits
MIN_WGT = 0
MAX_WGT = None
RAVER = 3

In [19]:
# Load data
df = data.copy()

# Make list of weights columns
wgt_list= ['wgt'+asset for asset in asset_list] 

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in df.index[1:]:                              # Start at 2nd date because estimates include that month's obs
    # Pull estimation inputs at each point in time
    est_pull_date = df.loc[i-1,'Date']
    means = est_means.loc[est_pull_date].values
    sds   = est_sds.loc[est_pull_date].values
    C     = est_corrs.loc[(est_pull_date,slice(None))].to_numpy()
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = constrained_gmv(means, cov, MIN_WGT, MAX_WGT)
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ means
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, RAVER)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [20]:
df = df.dropna()
df_noss = df.copy()
df[['Date']+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Date,rf,risky_ret,wgt_risky,port_ret
1,1985-01,0.0065,0.041685,1.015535,0.042232
2,1985-02,0.0058,0.042708,1.238702,0.051519
3,1985-03,0.0062,0.046096,1.17181,0.05295
4,1985-04,0.0072,-0.01647,0.972764,-0.015825
5,1985-05,0.0066,0.058232,1.078279,0.062274


In [21]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of No shorting:\t {avg_ret: .4f}')
print(f'SD(return) of No shorting:\t {sd_ret: .4f}')
print(f'Sharpe ratio of No shorting:\t {sr: .4f}')

# Save results
results.loc['No shorting','avg_ret'] = avg_ret
results.loc['No shorting','sd_ret']  = sd_ret
results.loc['No shorting','sr']      = sr

Avg return of No shorting:	  0.0157
SD(return) of No shorting:	  0.0734
Sharpe ratio of No shorting:	  0.1793


## Short-Sale Constraints and Max Position Limits

In [22]:
# Set position limits
MIN_WGT = 0
MAX_WGT = 0.1
RAVER = 3

In [23]:
# Load data
df = data.copy()

# Make list of weights columns
wgt_list= ['wgt'+asset for asset in asset_list] 

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in df.index[1:]:                              # Start at 2nd date because estimates include that month's obs
    # Pull estimation inputs at each point in time
    est_pull_date = df.loc[i-1,'Date']
    means = est_means.loc[est_pull_date].values
    sds   = est_sds.loc[est_pull_date].values
    C     = est_corrs.loc[(est_pull_date,slice(None))].to_numpy()
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = constrained_gmv(means, cov, MIN_WGT, MAX_WGT)
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ means
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    df.loc[i,'wgt_risky'] = capital_allocation(risky_expret, risky_sd, rf, RAVER)
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [24]:
df = df.dropna()
df_noss_maxlim = df.copy()
df[['Date']+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Date,rf,risky_ret,wgt_risky,port_ret
1,1985-01,0.0065,0.050836,0.562722,0.031449
2,1985-02,0.0058,0.029929,0.757192,0.02407
3,1985-03,0.0062,0.033098,0.700216,0.025035
4,1985-04,0.0072,-0.014157,0.513723,-0.003772
5,1985-05,0.0066,0.060433,0.60893,0.03938


In [25]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of No shorting & max positions:\t {avg_ret: .4f}')
print(f'SD(return) of No shorting & max positions:\t {sd_ret: .4f}')
print(f'Sharpe ratio of No shorting & max positions:\t {sr: .4f}')

# Save results
results.loc['No shorting & max positions','avg_ret'] = avg_ret
results.loc['No shorting & max positions','sd_ret']  = sd_ret
results.loc['No shorting & max positions','sr']      = sr

Avg return of No shorting & max positions:	  0.0161
SD(return) of No shorting & max positions:	  0.0702
Sharpe ratio of No shorting & max positions:	  0.1930


## Equal-weighted

In [26]:
# Load data
df = data.copy()

# Make list of weights columns
wgt_list= ['wgt'+asset for asset in asset_list] 

# Initialize empty columns for risky asset weights, risky portfolio return, CAL location, CAL portfolio return
for wgt in wgt_list:
    df[wgt] = np.nan
df['risky_ret'] = np.nan
df['wgt_risky'] = np.nan 
df['port_ret']  = np.nan

# Choose optimal portfolios each time period
for i in df.index[1:]:                              # Start at 2nd date because estimates include that month's obs
    # Pull estimation inputs at each point in time
    est_pull_date = df.loc[i-1,'Date']
    means = est_means.loc[est_pull_date].values
    sds   = est_sds.loc[est_pull_date].values
    C     = est_corrs.loc[(est_pull_date,slice(None))].to_numpy()
    cov = np.diag(sds) @ C @ np.diag(sds)

    # Pull current period returns
    rf = df.loc[i,'rf']
    rets = df.loc[i,ret_list].values

    ##### Note: all portfolio weights considered to be beginning of period weights
    ##### (so multiply by contemporaneous realized returns)
    wgts = (1/n)*np.ones(n)
    df.loc[i,wgt_list] = wgts
    df.loc[i,'risky_ret'] = wgts @ rets

    # Capital allocation and overall portfolio return
    risky_expret = wgts @ means
    risky_sd     = np.sqrt(wgts @ cov @ wgts)
    df.loc[i,'wgt_risky'] = max(0,capital_allocation(risky_expret, risky_sd, rf, RAVER))
    df.loc[i,'port_ret']  = rf + df.loc[i,'wgt_risky']*(df.loc[i,'risky_ret'] -rf)

In [27]:
df = df.dropna()
df_ew = df.copy()
df[['Date']+['rf','risky_ret','wgt_risky','port_ret']].head()

Unnamed: 0,Date,rf,risky_ret,wgt_risky,port_ret
1,1985-01,0.0065,0.097065,0.285082,0.032318
2,1985-02,0.0058,0.017535,0.414214,0.010661
3,1985-03,0.0062,-0.003635,0.376748,0.002495
4,1985-04,0.0072,-0.014421,0.258474,0.001612
5,1985-05,0.0066,0.057908,0.312556,0.022637


In [28]:
avg_ret = df.port_ret.mean()
sd_ret  = df.port_ret.std()
avg_rf  = df.rf.mean()
sr = (avg_ret - avg_rf)/sd_ret
print(f'Avg return of Equal-weighted:\t {avg_ret: .4f}')
print(f'SD(return) of Equal-weighted:\t {sd_ret: .4f}')
print(f'Sharpe ratio of Equal-weighted:\t {sr: .4f}')

# Save results
results.loc['Equal-weighted','avg_ret'] = avg_ret
results.loc['Equal-weighted','sd_ret']  = sd_ret
results.loc['Equal-weighted','sr']      = sr

Avg return of Equal-weighted:	  0.0105
SD(return) of Equal-weighted:	  0.0509
Sharpe ratio of Equal-weighted:	  0.1571


## Comparing the results

In [29]:
results.round(4)

Unnamed: 0,sr,avg_ret,sd_ret
Unconstrained,0.1096,0.0129,0.0942
No shorting,0.1793,0.0157,0.0734
No shorting & max positions,0.193,0.0161,0.0702
Equal-weighted,0.1571,0.0105,0.0509


Plot the data

In [30]:
results = results.reset_index()

fig = go.Figure()
string = "Sharpe ratio: %{y:0.3f}<br>"
string += "Average return: %{customdata[0]:0.1%}<br>"
string += "SD(return): %{customdata[1]:0.1%}<br>"
string += "<extra></extra>"
trace = go.Bar(x=results['index'], y=results['sr'], 
    customdata=results[['avg_ret','sd_ret']], 
    hovertemplate=string)
fig.add_trace(trace)
fig.layout.yaxis["title"] = "Sharpe ratio"
fig.layout.xaxis["title"] = "Strategy"
fig.show()