In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import statsmodels.api as sm
import statsmodels.formula.api as smf 
import warnings
warnings.simplefilter(action='ignore')
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import scipy.optimize as solver
from functools import reduce
from numpy.linalg import inv,norm

In [2]:
Data_FF = pd.read_excel('Data_Project_2_2020-1.xlsx',header=None)
Data_FF.columns = ['Date','mktrf','smb','hml','RMW','CMA','rf','umd']# Specify the names of the columns
Data_FF = Data_FF.drop(0,axis=0) #drop first row
Data_FF.set_index('Date',inplace=True)
Data_FF = Data_FF.astype(float) / 100

#load the data for mutual fund, hedge fund and smart beta
xls = pd.ExcelFile('Data_Project_2_2020-1.xlsx')
MF = pd.read_excel(xls, 'Mutual Fund')
SB = pd.read_excel(xls, 'Smart Beta')
HF = pd.read_excel(xls, 'Hedge Fund Index')
for i in [MF,SB,HF]:
    i.set_index('Date',inplace=True)

#organize the index of all matrices
for i in [Data_FF,MF,SB,HF]:
    i.index = i.index.astype(str)

MF=MF['200101':]
HF=HF['200101':]
SB=SB['200101':]

pd.set_option("display.precision", 4)
np.set_printoptions(precision=4)

# **Part 1**

### **3.1 Mutual Fund**

In [3]:
#subtract risk free rate from mutual fund returns
for i in MF.columns:
    MF[i] = MF[i]-Data_FF['rf']['200101':'201912']
MF

Unnamed: 0_level_0,FSMEX,FSELX,INPIX,SLMCX,CSEIX,FSPHX,NASDX,JAENX,JENSX,DAPAX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
200101,-0.0276,0.4087,0.1086,0.2381,0.0415,-0.0080,0.0989,0.0453,-0.0369,0.1314
200102,0.0164,-0.2980,-0.5088,-0.2060,-0.0147,-0.0180,-0.2677,-0.2218,-0.0252,-0.1364
200103,-0.0907,-0.0759,-0.3975,-0.0739,0.0050,-0.0881,-0.1834,-0.1847,-0.0619,-0.0907
200104,0.0123,0.2535,0.3346,0.2351,0.0425,0.0257,0.1748,0.1427,0.0356,0.0833
200105,0.0660,-0.0742,-0.0241,-0.0257,0.0163,0.0256,-0.0327,-0.0146,0.0211,-0.0206
...,...,...,...,...,...,...,...,...,...,...
201908,-0.0134,-0.0382,-0.0952,-0.0308,0.0476,-0.0288,-0.0225,-0.0182,-0.0127,-0.0123
201909,-0.0163,0.0430,-0.0334,0.0192,0.0029,-0.0261,0.0041,0.0084,0.0102,-0.0041
201910,0.0195,0.0629,-0.0142,0.0444,0.0214,0.0655,0.0440,-0.0036,0.0151,0.0249
201911,0.0613,0.0558,0.0410,0.0511,-0.0372,0.0888,-0.0271,0.0384,0.0341,0.0450


In [4]:
#extract and name the sample period for mutual funds benchmark 
MF_FF = Data_FF.loc['200101':'201912']

#append the mutual fund data with the benchmark data to facilitate regression
MF_reg = pd.concat([MF,MF_FF],axis=1)

In [5]:
#create a dataframe to contain mutual fund regression data
MF_data = pd.DataFrame(index=MF.columns,columns=['alpha','mktrf', 'smb', 'hml', 'RMW', 'CMA', 'umd'])

##create a dataframe to contain t-statistics for the regression data
MF_tstat=pd.DataFrame(index=MF.columns,columns=['alpha','mktrf', 'smb', 'hml', 'RMW', 'CMA', 'umd'])

#regress the 10 mutual funds return 
for i in MF.columns:
    reg_MF = smf.ols(i+'~mktrf+smb+hml+RMW+CMA+umd', data=MF_reg).fit()
    MF_data.loc[i]=reg_MF.params.tolist()
    MF_tstat.loc[i]=reg_MF.tvalues.tolist()

In [6]:
#define functions to highlight funds with significant alphas 
def significant_tstat(company):
    if company.alpha > 2:
        return ['background-color: yellow']*7
    else:
        return ['background-color: white']*7

In [7]:
MF_data

Unnamed: 0,alpha,mktrf,smb,hml,RMW,CMA,umd
FSMEX,0.006234,0.682,0.2819,-0.2058,-0.1525,-0.02684,0.153
FSELX,0.004536,1.288,0.4999,-0.6214,-0.6503,-0.299,-0.2677
INPIX,0.008276,1.786,0.1312,-1.023,-1.62,-0.5765,-0.1412
SLMCX,0.00482,1.046,0.2806,-0.4623,-0.3214,-0.5088,-0.1252
CSEIX,0.002567,0.8491,0.3197,0.3296,0.4205,0.02162,-0.1416
FSPHX,0.08957,-1.273,0.2979,4.533,-0.9855,-3.53,-0.5839
NASDX,0.002857,1.128,-0.03676,-0.5498,-0.4205,-0.2456,-0.104
JAENX,0.0003458,1.018,0.2544,-0.2349,-0.05168,-0.3975,-0.03695
JENSX,-0.0005523,0.9275,-0.1202,-0.04086,0.287,0.1929,-0.01207
DAPAX,-0.0006866,1.004,-0.01838,-0.2655,0.02397,-0.2423,-0.01638


In [8]:
MF_tstat.style.apply(significant_tstat, axis=1)

Unnamed: 0,alpha,mktrf,smb,hml,RMW,CMA,umd
FSMEX,2.229,8.601,2.483,-1.61,-0.9779,-0.148,2.445
FSELX,1.319,13.21,3.579,-3.953,-3.391,-1.34,-3.479
INPIX,2.241,17.05,0.8752,-6.06,-7.869,-2.407,-1.71
SLMCX,1.878,14.38,2.693,-3.942,-2.247,-3.058,-2.181
CSEIX,0.6457,7.532,1.981,1.814,1.897,0.08387,-1.592
FSPHX,1.563,-0.7836,0.128,1.731,-0.3084,-0.9498,-0.4555
NASDX,1.863,25.95,-0.5905,-7.849,-4.92,-2.47,-3.034
JAENX,0.233,24.2,4.221,-3.463,-0.6246,-4.13,-1.113
JENSX,-0.3845,22.78,-2.061,-0.6227,3.584,2.072,-0.3756
DAPAX,-0.3459,17.84,-0.2281,-2.928,0.2166,-1.883,-0.3691


### 3.2 Hedge Fund

In [9]:
#subtract risk free rate from mutual fund returns
for i in HF.columns:
    HF[i] = HF[i]-Data_FF['rf']['200101':'201912']
HF

Unnamed: 0_level_0,HFRIDSI,HFRIMAI,HFRIEMNI,HFRIENHI,HFRIEM,HFRIEHI,HFRIEDI,HFRIMI,HFRIMTI,HFRIFOF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
200101,0.0225,0.0056,-0.0211,0.0746,0.0607,0.0234,0.0405,0.0161,-0.0021,0.0139
200102,0.0092,0.0006,0.0169,-0.0779,-0.0309,-0.0294,-0.0076,-0.0209,-0.0366,-0.0112
200103,-0.0104,-0.0117,0.0135,-0.0547,-0.0349,-0.0272,-0.0067,0.0041,-0.0246,-0.0086
200104,-0.0021,-0.0016,-0.0033,0.0532,0.0117,0.0188,0.0084,-0.0051,0.0341,0.0030
200105,0.0243,0.0137,-0.0004,0.0200,0.0230,0.0058,0.0162,-0.0042,0.0074,0.0058
...,...,...,...,...,...,...,...,...,...,...
201908,-0.0153,-0.0012,0.0014,-0.0146,-0.0310,-0.0190,-0.0147,0.0214,0.0353,-0.0093
201909,-0.0020,0.0024,-0.0044,0.0035,0.0079,-0.0014,0.0001,-0.0202,-0.0337,-0.0063
201910,-0.0060,0.0031,-0.0035,0.0136,0.0176,0.0125,0.0022,-0.0126,-0.0233,0.0035
201911,-0.0092,0.0080,0.0002,0.0134,-0.0055,0.0155,0.0058,0.0019,0.0063,0.0080


In [10]:
#extract and name the sample period for mutual funds benchmark 
HF_FF = Data_FF.loc['200101':'201912']

#append the mutual fund data with the benchmark data to facilitate regression
HF_reg = pd.concat([HF,HF_FF],axis=1)

In [11]:
#create a dataframe to contain mutual fund regression data
HF_data = pd.DataFrame(index=HF.columns,columns=['alpha','mktrf', 'smb', 'hml', 'RMW', 'CMA', 'umd'])

##create a dataframe to contain t-statistics for the regression data
HF_tstat=pd.DataFrame(index=HF.columns,columns=['alpha','mktrf', 'smb', 'hml', 'RMW', 'CMA', 'umd'])

#regress the 10 mutual funds return 
for i in HF.columns:
    reg_HF = smf.ols(i+'~mktrf+smb+hml+RMW+CMA+umd', data=HF_reg).fit()
    HF_data.loc[i]=reg_HF.params.tolist()
    HF_tstat.loc[i]=reg_HF.tvalues.tolist()

In [12]:
HF_data

Unnamed: 0,alpha,mktrf,smb,hml,RMW,CMA,umd
HFRIDSI,0.002954,0.2332,0.09725,0.08539,0.006676,-0.0007601,0.003584
HFRIMAI,0.001535,0.1297,0.0514,-0.01332,-0.002711,0.001171,0.0162
HFRIEMNI,0.0002971,0.1041,0.0166,0.02633,0.03366,0.02312,0.07489
HFRIENHI,0.001081,0.4935,0.1675,-0.1162,-0.1353,-0.03754,0.01307
HFRIEM,0.002547,0.4891,0.09449,0.02822,0.06711,-0.1289,-0.03311
HFRIEHI,0.0001905,0.4394,0.1447,-0.0154,0.007877,-0.06833,0.02586
HFRIEDI,0.002314,0.2827,0.1296,0.02611,-0.04392,0.003315,-0.004081
HFRIMI,0.001973,0.09635,0.02226,-0.0714,0.01054,0.008317,0.04441
HFRIMTI,0.002091,0.1636,-0.03781,-0.1982,-0.05203,0.08703,0.08369
HFRIFOF,0.0002141,0.2362,0.05585,-0.02808,0.002423,-0.04441,0.05351


In [13]:
HF_tstat.style.apply(significant_tstat, axis=1)

Unnamed: 0,alpha,mktrf,smb,hml,RMW,CMA,umd
HFRIDSI,3.213,8.945,2.605,2.032,0.1302,-0.01275,0.1742
HFRIMAI,3.472,10.34,2.863,-0.6594,-0.11,0.04083,1.638
HFRIEMNI,0.7348,9.083,1.011,1.426,1.494,0.8819,8.283
HFRIENHI,1.28,20.62,4.887,-3.012,-2.876,-0.6857,0.6923
HFRIEM,1.708,11.57,1.561,0.4142,0.8073,-1.333,-0.9929
HFRIEHI,0.2619,21.3,4.897,-0.4632,0.1942,-1.448,1.589
HFRIEDI,3.262,14.06,4.499,0.8057,-1.111,0.07209,-0.2573
HFRIMI,2.104,3.625,0.5848,-1.667,0.2016,0.1368,2.118
HFRIMTI,1.456,4.016,-0.6482,-3.019,-0.6496,0.9342,2.605
HFRIFOF,0.3325,12.94,2.136,-0.9541,0.06748,-1.063,3.715


### 3.3 Smart Betas

In [14]:
#subtract risk free rate from mutual fund returns
for i in SB.columns:
    SB[i] = SB[i]-Data_FF['rf']['200101':'201912']
SB

Unnamed: 0_level_0,EZM,DWAQ,FPX,PWC,DLN,PSL,RSP,NFO,SPHQ,CVY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
200101,,,,,,,,,,
200102,,,,,,,,,,
200103,,,,,,,,,,
200104,,,,,,,,,,
200105,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
201908,-0.0504,-0.0462,-0.0074,-0.0478,-0.0207,0.0268,-0.0343,0.0062,-0.0138,-0.0490
201909,0.0307,-0.0876,-0.0425,0.0087,0.0324,-0.0459,0.0260,-0.0026,0.0127,0.0432
201910,0.0216,0.0100,0.0148,0.0077,0.0154,-0.0163,0.0157,0.0026,0.0278,0.0117
201911,0.0297,0.0865,0.0364,0.0243,0.0289,0.0245,0.0328,0.0198,0.0323,0.0238


In [15]:
#extract and name the sample period for mutual funds benchmark 
SB_FF = Data_FF.loc['200101':'201912']

#append the mutual fund data with the benchmark data to facilitate regression
SB_reg = pd.concat([SB,SB_FF],axis=1)

In [16]:
#create a dataframe to contain mutual fund regression data
SB_data = pd.DataFrame(index=SB.columns,columns=['alpha','mktrf', 'smb', 'hml', 'RMW', 'CMA', 'umd'])

##create a dataframe to contain t-statistics for the regression data
SB_tstat=pd.DataFrame(index=SB.columns,columns=['alpha','mktrf', 'smb', 'hml', 'RMW', 'CMA', 'umd'])

#regress the 10 mutual funds return 
for i in SB.columns:
    reg_SB = smf.ols(i+'~mktrf+smb+hml+RMW+CMA+umd', data=SB_reg).fit()
    SB_data.loc[i]=reg_SB.params.tolist()
    SB_tstat.loc[i]=reg_SB.tvalues.tolist()

In [17]:
SB_data

Unnamed: 0,alpha,mktrf,smb,hml,RMW,CMA,umd
EZM,-0.0003355,1.03,0.5157,-0.06112,0.2305,0.0252,-0.1707
DWAQ,-0.0004139,1.033,0.5086,-0.2273,-0.3599,-0.1909,0.1925
FPX,0.001348,1.08,0.1865,-0.3146,-0.1093,-0.2487,-0.0006754
PWC,-0.001409,1.002,0.1988,0.02029,0.07606,-0.05524,0.1308
DLN,-0.0004252,0.9244,-0.2628,0.1515,0.1448,0.164,-0.05857
PSL,0.001996,0.6965,0.06949,-0.1094,0.09647,0.04691,0.05105
RSP,6.458e-05,1.049,0.1127,-0.01782,0.04123,0.104,-0.1443
NFO,0.0001022,1.035,0.3317,-0.1817,-0.01207,-0.02565,-0.1857
SPHQ,-0.002313,1.017,0.1159,-0.1256,0.236,-0.01267,0.1919
CVY,-0.002831,0.9151,-0.09246,0.01016,0.2122,0.03934,-0.3851


In [18]:
SB_tstat.style.apply(significant_tstat, axis=1)

Unnamed: 0,alpha,mktrf,smb,hml,RMW,CMA,umd
EZM,-0.3298,38.15,11.24,-1.194,3.36,0.3036,-7.119
DWAQ,-0.282,24.18,7.499,-3.102,-3.657,-1.649,5.256
FPX,0.9493,28.08,2.881,-4.408,-1.123,-2.149,-0.01977
PWC,-1.525,37.25,4.654,0.4396,1.227,-0.758,5.671
DLN,-0.5483,44.22,-7.457,3.898,2.741,2.613,-3.154
PSL,1.135,14.8,0.8672,-1.221,0.8041,0.3246,1.215
RSP,0.1171,65.03,4.395,-0.6525,1.149,2.415,-10.61
NFO,0.07223,27.27,5.14,-2.515,-0.1247,-0.2201,-5.482
SPHQ,-1.623,26.06,1.81,-1.761,2.406,-0.1097,5.581
CVY,-1.653,19.94,-1.184,0.1163,1.813,0.2791,-9.399


## **Part 2**

In [19]:
#define the in-sample and out-of-sample dataset
insample_MF = MF['200101':'201212']
outsample_MF = MF['201301':'201912']

insample_HF = HF['200101':'201212']
outsample_HF = HF['201301':'201912']

insample_SB = SB['200101':'201212']
outsample_SB = SB['201301':'201912']

### **Portfolio optimization using Mutual Funds**

#### Global-Minimum Variance Portfolio

In [20]:
covariance_MFIS = insample_MF.cov()

In [21]:
#define the objective functions for the GMV optimization problems
def port_std_MFIS(w):
    return np.sqrt(reduce(np.dot, [w, covariance_MFIS, w.T]))  

In [22]:
#define the long only GMV optimization function given the objective function and number of assets
def GMVoptimize(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    bounds = tuple((0, 1) for x in range(num))  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints, bounds=bounds)  
    return outcome.x

#define the function to calculate optimal portfolio weights for long short GMV portfolios
def GMVoptimize_extend(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints)  
    return outcome.x

In [23]:
#calculate the portfolio return and standard deviation given the weights
def port_stats(op_weights,returns):
    port_mean = sum(op_weights*returns.mean())
    port_std = np.sqrt(reduce(np.dot, [op_weights, returns.cov(), op_weights.T]))  
    return [port_mean,port_std]

In [24]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
GMV_weights = GMVoptimize(port_std_MFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old =reduce(np.dot, [GMV_weights, covariance_MFIS, GMV_weights.T])
    new_w=GMVoptimize(port_std_MFIS,10,a)
    new=reduce(np.dot, [new_w, covariance_MFIS, new_w.T])
    if new<=old:
        GMV_weights=new_w
    else:
        GMV_weights=GMV_weights
        
#this gives us the final optimal weights that optimizes the long only GMV portfolios
print(GMV_weights)
#we also print out the resulting portfolio variance
print(reduce(np.dot, [GMV_weights, covariance_MFIS, GMV_weights.T]))

[2.9763e-01 0.0000e+00 1.5396e-17 0.0000e+00 7.1991e-17 4.6852e-02
 0.0000e+00 1.7347e-18 6.5551e-01 8.6736e-19]
0.0014766820646828316


In [25]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
GMV_weights2 = GMVoptimize_extend(port_std_MFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old =reduce(np.dot, [GMV_weights2, covariance_MFIS, GMV_weights2.T])
    new_w=GMVoptimize_extend(port_std_MFIS,10,a)
    new=reduce(np.dot, [new_w, covariance_MFIS, new_w.T])
    if new<=old:
        GMV_weights2=new_w
    else:
        GMV_weights2=GMV_weights2
        
#this gives us the final optimal weights that optimizes the long short GMV portfolios
print(GMV_weights2)
#we also print out the resulting portfolio variance
print(reduce(np.dot, [GMV_weights2, covariance_MFIS, GMV_weights2.T]))

[ 0.2663 -0.2454 -0.3553  0.3019 -0.0581 -0.0755  0.5404  0.0358  0.4762
  0.1137]
0.0008877339784774901


In [26]:
# #calculate the long only GMV optimal portfolio weights using the in-sample data
# GMV_weights = GMVoptimize(port_std_MFIS,10)
# GMV_weights

In [27]:
# #calculate the long short GMV optimal portfolio weights using the in-sample data
# GMV_weights2 = GMVoptimize_extend(port_std_MFIS,10)
# GMV_weights2

In [28]:
#just to verify our weights
ones=np.ones(10).reshape(10,1)
df=covariance_MFIS
df_inv = pd.DataFrame(np.linalg.pinv(df.values), df.columns, df.index)
xxy=df_inv.dot(ones) / (ones.T.dot(df_inv.dot(ones)))
xxy[0]
reduce(np.dot, [xxy[0], covariance_MFIS, xxy[0].T])

0.0008876436729811842

In [29]:
xxy

Unnamed: 0,0
FSMEX,0.2711
FSELX,-0.243
INPIX,-0.3567
SLMCX,0.2924
CSEIX,-0.0581
FSPHX,-0.0765
NASDX,0.5467
JAENX,0.0434
JENSX,0.4707
DAPAX,0.11


#### **Maximum Sharpe Ratio Portolios**

In [30]:
# we assume the risk aversion factor is 2 during our calculations (A=2)

#define the objective functions for the MVP optimization problems

# def sharpe_MFIS(w):
#     mu = np.dot(w,insample_MF.mean())
#     S = reduce(np.dot, [w, covariance_MFIS, w.T])
#     A = 2
#     return -(mu - (A/2)*S)

def sharpe_MFIS(w):
    mu = np.dot(w,insample_MF.mean())
    S = reduce(np.dot, [w, covariance_MFIS, w.T])
    sq=S**0.5
    return -(mu/sq)

In [31]:
#define the long only MVP optimization function given the objective function and number of assets
def MVPoptimize(func, num,init):
    bounds = tuple((0, 1) for x in range(num))  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints, bounds=bounds)  
    return outcome.x

#define the long short MVP optimization function given the objective function and number of assets
def MVPoptimize_extend(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints)  
    return outcome.x

In [32]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
MVP_weights = MVPoptimize(sharpe_MFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(MVP_weights,insample_MF)[0]/(reduce(np.dot, [MVP_weights, covariance_MFIS, MVP_weights.T])**0.5)
    new_w=MVPoptimize(sharpe_MFIS,10,a)
    new=port_stats(new_w,insample_MF)[0]/(reduce(np.dot, [new_w, covariance_MFIS, new_w.T])**0.5)    
    if new>=old:
        MVP_weights=new_w
    else:
        MVP_weights=MVP_weights
        
#this gives us the final optimal weights that optimizes the long only MVP portfolios
print(MVP_weights)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(MVP_weights,insample_MF)[0]/(reduce(np.dot, [MVP_weights, covariance_MFIS, MVP_weights.T])**0.5))

[6.8825e-01 0.0000e+00 0.0000e+00 8.8362e-17 3.1175e-01 0.0000e+00
 0.0000e+00 0.0000e+00 5.1391e-17 0.0000e+00]
0.14668547629372528


In [33]:
#we run the optimization with multiple initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
MVP_weights2 = MVPoptimize_extend(sharpe_MFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(MVP_weights2,insample_MF)[0]/(reduce(np.dot, [MVP_weights2, covariance_MFIS, MVP_weights2.T])**0.5)
    new_w=MVPoptimize_extend(sharpe_MFIS,10,a)
    new=port_stats(new_w,insample_MF)[0]/(reduce(np.dot, [new_w, covariance_MFIS, new_w.T])**0.5)
    if new>old:
        MVP_weights2=new_w
    else:
        MVP_weights2=MVP_weights2
        
#this gives us the final optimal weights that optimizes the long short MVP portfolios
print(MVP_weights2)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(MVP_weights2,insample_MF)[0]/(reduce(np.dot, [MVP_weights2, covariance_MFIS, MVP_weights2.T])**0.5))

[ 2.4508 -0.9422 -0.2328  2.856   1.2161 -1.2787  0.9371 -1.4827 -0.1202
 -2.4035]
0.2713761145000656


In [34]:
mu=insample_MF.mean()
mu=np.asarray(mu)
S=np.asarray(covariance_MFIS)
w=np.dot(inv(S),mu)

In [35]:
w

array([ 5.6623, -2.1921, -0.5427,  6.6352,  2.8262, -2.9411,  2.1898,
       -3.4506, -0.2891, -5.5795])

In [36]:
port_stats(w,insample_MF)

[0.07364557176177684, 0.2713771761990621]

In [37]:
# #calculate the long only MVP optimal portfolio weights using the in-sample data
# MVP_weights = MVPoptimize(sharpe_MFIS,10)
# MVP_weights

In [38]:
# #calculate the long short MVP optimal portfolio weights using the in-sample data
# MVP_weights2 = MVPoptimize_extend(sharpe_MFIS,10)
# MVP_weights2

#### MVP with tracking error constraint

In [39]:
#define the tracking error constraint
def constraint1(w):
    benchmark=np.array([1.0 / 10 for x in range(10)])
    x=w-benchmark
    return np.sqrt(reduce(np.dot, [x.T, covariance_MFIS, x]))-0.05

In [40]:
#define the MVP optimization with tracking error constraint function given the objective function and number of assets
def MF_TrackError(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1},
                  {'type': 'ineq', 'fun': constraint1}] 
    outcome = solver.minimize(func, x0=init, constraints=constraints)  
    return outcome.x

In [41]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
TE_weights = MF_TrackError(sharpe_MFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(TE_weights,insample_MF)[0]/(reduce(np.dot, [TE_weights, covariance_MFIS, TE_weights.T])**0.5)
    new_w=MF_TrackError(sharpe_MFIS,10,a)
    new=port_stats(new_w,insample_MF)[0]/(reduce(np.dot, [new_w, covariance_MFIS, new_w.T])**0.5)    
    if new>=old:
        TE_weights=new_w
    else:
        TE_weights=TE_weights
        
#this gives us the final optimal weights that optimizes the MVP portfolios with tracking error constraint
print(TE_weights)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(TE_weights,insample_MF)[0]/(reduce(np.dot, [TE_weights, covariance_MFIS, TE_weights.T])**0.5))

[ 2.45   -0.9444 -0.2331  2.8603  1.2186 -1.2755  0.9414 -1.4873 -0.1262
 -2.4038]
0.2713768084113071


In [42]:
# TE_weights = MF_TrackError(sharpe_MFIS,10)
# TE_weights

#### MVP with factor neutral constraint

In [43]:
#define the factor neutral constraints for the fama french 5 factors
def constraint2_1(w):
    return MF_data['mktrf'].dot(w)-0
def constraint2_2(w):
    return MF_data['smb'].dot(w)-0
def constraint2_3(w):
    return MF_data['hml'].dot(w)-0
def constraint2_4(w):
    return MF_data['RMW'].dot(w)-0
def constraint2_5(w):
    return MF_data['CMA'].dot(w)-0

In [44]:
def MF_FactorNeutral(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1},
                  {'type': 'eq', 'fun': constraint2_1},
                  {'type': 'eq', 'fun': constraint2_2},
                  {'type': 'eq', 'fun': constraint2_3},
                  {'type': 'eq', 'fun': constraint2_4},
                  {'type': 'eq', 'fun': constraint2_5}]
    bounds = tuple((-2, 2) for x in range(num))  
    outcome = solver.minimize(func, x0=init, constraints=constraints,bounds=bounds)  
    return outcome.x

In [45]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
FN_weights = MF_FactorNeutral(sharpe_MFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(FN_weights,insample_MF)[0]/(reduce(np.dot, [FN_weights, covariance_MFIS, FN_weights.T])**0.5)
    new_w=MF_FactorNeutral(sharpe_MFIS,10,a)
    new=port_stats(new_w,insample_MF)[0]/(reduce(np.dot, [new_w, covariance_MFIS, new_w.T])**0.5)    
    if new>=old:
        FN_weights=new_w
    else:
        FN_weights=FN_weights
        
#this gives us the final optimal weights that optimizes the MVP portfolios with factor neutral constraint
print(FN_weights)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(FN_weights,insample_MF)[0]/(reduce(np.dot, [FN_weights, covariance_MFIS, FN_weights.T])**0.5))

[ 1.5178 -1.2779 -0.4136  2.      0.599   0.0271  2.     -1.8802 -0.2676
 -1.3046]
0.22323384323534928


In [46]:
# FN_weights=MF_FactorNeutral(sharpe_MFIS,10)
# FN_weights

#### Mutual Fund Optimization Results

In [47]:
#Create table for output as required
MFIS_table = pd.DataFrame(index=['Long only GMV','Long only MVP','Long short GMV',
                                'Long short MVP','Tracking error <=5%','Factor Neutral'], columns = ['Mean',
                                                                                                   'volatility','sharpe','mktrf','smb','hml',
                                                                                                   'RMW','CMA','umd'])
MFOS_table = pd.DataFrame(index=['Long only GMV','Long only MVP','Long short GMV',
                                'Long short MVP','Tracking error <=5%','Factor Neutral'], columns = ['Mean',
                                                                                                   'volatility','sharpe','mktrf','smb','hml',
                                                                                                   'RMW','CMA','umd'])

In [48]:
#Calculate the portfolio statistics using insample data for mutual funds 
MFIS_table.loc['Long only GMV']['Mean']= port_stats(GMV_weights,insample_MF)[0]
MFIS_table.loc['Long only GMV']['volatility']= port_stats(GMV_weights,insample_MF)[1] ** 2

MFIS_table.loc['Long only MVP']['Mean']= port_stats(MVP_weights,insample_MF)[0]
MFIS_table.loc['Long only MVP']['volatility']= port_stats(MVP_weights,insample_MF)[1] ** 2

MFIS_table.loc['Long short GMV']['Mean']= port_stats(GMV_weights2,insample_MF)[0]
MFIS_table.loc['Long short GMV']['volatility']= port_stats(GMV_weights2,insample_MF)[1] ** 2

MFIS_table.loc['Long short MVP']['Mean']= port_stats(MVP_weights2,insample_MF)[0]
MFIS_table.loc['Long short MVP']['volatility']= port_stats(MVP_weights2,insample_MF)[1] ** 2

MFIS_table.loc['Tracking error <=5%']['Mean']= port_stats(TE_weights,insample_MF)[0]
MFIS_table.loc['Tracking error <=5%']['volatility']= port_stats(TE_weights,insample_MF)[1] ** 2

MFIS_table.loc['Factor Neutral']['Mean']= port_stats(FN_weights,insample_MF)[0]
MFIS_table.loc['Factor Neutral']['volatility']= port_stats(FN_weights,insample_MF)[1] ** 2

MFIS_table['Mean']=(1+MFIS_table['Mean'])**12-1
MFIS_table['volatility']=np.sqrt(12)*MFIS_table['volatility']

for i in MFIS_table.index:
    MFIS_table.loc[i]['sharpe']=(MFIS_table.loc[i]['Mean'])/(MFIS_table.loc[i]['volatility']**0.5)

for i in ['mktrf','smb','hml', 'RMW','CMA','umd']:
    MFIS_table.loc['Long only GMV'][i]=MF_data[i].dot(GMV_weights)
    MFIS_table.loc['Long only MVP'][i]=MF_data[i].dot(MVP_weights)
    MFIS_table.loc['Long short GMV'][i]=MF_data[i].dot(GMV_weights2)
    MFIS_table.loc['Long short MVP'][i]=MF_data[i].dot(MVP_weights2)
    MFIS_table.loc['Tracking error <=5%'][i]=MF_data[i].dot(TE_weights)
    MFIS_table.loc['Factor Neutral'][i]=MF_data[i].dot(FN_weights)

In [49]:
#Calculate the portfolio statistics using outsample data for mutual funds 
MFOS_table.loc['Long only GMV']['Mean']= port_stats(GMV_weights,outsample_MF)[0]
MFOS_table.loc['Long only GMV']['volatility']= port_stats(GMV_weights,outsample_MF)[1] ** 2

MFOS_table.loc['Long only MVP']['Mean']= port_stats(MVP_weights,outsample_MF)[0]
MFOS_table.loc['Long only MVP']['volatility']= port_stats(MVP_weights,outsample_MF)[1] ** 2

MFOS_table.loc['Long short GMV']['Mean']= port_stats(GMV_weights2,outsample_MF)[0]
MFOS_table.loc['Long short GMV']['volatility']= port_stats(GMV_weights2,outsample_MF)[1] ** 2

MFOS_table.loc['Long short MVP']['Mean']= port_stats(MVP_weights2,outsample_MF)[0]
MFOS_table.loc['Long short MVP']['volatility']= port_stats(MVP_weights2,outsample_MF)[1] ** 2

MFOS_table.loc['Tracking error <=5%']['Mean']= port_stats(TE_weights,outsample_MF)[0]
MFOS_table.loc['Tracking error <=5%']['volatility']= port_stats(TE_weights,outsample_MF)[1] ** 2

MFOS_table.loc['Factor Neutral']['Mean']= port_stats(FN_weights,outsample_MF)[0]
MFOS_table.loc['Factor Neutral']['volatility']= port_stats(FN_weights,outsample_MF)[1] ** 2

MFOS_table['Mean']=(1+MFOS_table['Mean'])**12-1
MFOS_table['volatility']=np.sqrt(12)*MFOS_table['volatility']

for i in MFOS_table.index:
    MFOS_table.loc[i]['sharpe']=(MFOS_table.loc[i]['Mean'])/(MFOS_table.loc[i]['volatility']**0.5)

for i in ['mktrf','smb','hml', 'RMW','CMA','umd']:
    MFOS_table.loc['Long only GMV'][i]=MF_data[i].dot(GMV_weights)
    MFOS_table.loc['Long only MVP'][i]=MF_data[i].dot(MVP_weights)
    MFOS_table.loc['Long short GMV'][i]=MF_data[i].dot(GMV_weights2)
    MFOS_table.loc['Long short MVP'][i]=MF_data[i].dot(MVP_weights2)
    MFOS_table.loc['Tracking error <=5%'][i]=MF_data[i].dot(TE_weights)
    MFOS_table.loc['Factor Neutral'][i]=MF_data[i].dot(FN_weights)

In [50]:
MFIS_table

Unnamed: 0,Mean,volatility,sharpe,mktrf,smb,hml,RMW,CMA,umd
Long only GMV,0.04132,0.005115,0.5778,0.7513,0.0191,0.1244,0.09655,-0.04692,0.01026
Long only MVP,0.08851,0.008098,0.9835,0.7341,0.2937,-0.03888,0.02616,-0.01173,0.06115
Long short GMV,0.025,0.003075,0.4509,0.7955,-0.1206,-0.395,0.5578,0.3,0.106
Long short MVP,0.455,0.04741,2.09,2.714,0.6458,-5.921,1.06,4.356,0.8751
Tracking error <=5%,0.4555,0.04749,2.09,2.706,0.6468,-5.906,1.055,4.343,0.8727
Factor Neutral,0.2557,0.02551,1.601,2.615e-10,2.375e-11,3.63e-11,1.538e-12,1.091e-10,0.1676


In [51]:
MFOS_table

Unnamed: 0,Mean,volatility,sharpe,mktrf,smb,hml,RMW,CMA,umd
Long only GMV,0.3035,0.01628,2.379,0.7513,0.0191,0.1244,0.09655,-0.04692,0.01026
Long only MVP,0.2017,0.008588,2.177,0.7341,0.2937,-0.03888,0.02616,-0.01173,0.06115
Long short GMV,-0.04898,0.04607,-0.2282,0.7955,-0.1206,-0.395,0.5578,0.3,0.106
Long short MVP,-0.9475,10.61,-0.2908,2.714,0.6458,-5.921,1.06,4.356,0.8751
Tracking error <=5%,-0.947,10.56,-0.2914,2.706,0.6468,-5.906,1.055,4.343,0.8727
Factor Neutral,0.2957,0.02558,1.849,2.615e-10,2.375e-11,3.63e-11,1.538e-12,1.091e-10,0.1676


In [52]:
#given the asset returns, graph the mean and variance for a set number of random portfolios
# def portfolios(returns,simulations):
#     means=returns.mean()
#     cov_matrix=returns.cov()
#     port_return = []
#     port_std = []
#     for _ in range(simulations):
#         w = np.random.rand(10)
#         w /=sum(w)
#         port_return.append(sum(means * w))
#         port_std.append(np.sqrt(reduce(np.dot, [w, cov_matrix, w.T])))
#     plt.plot(port_std, port_return,'bo')

In [53]:
# portfolios(insample_MF,10000)
# plt.plot(GMV_port(insample_weights,insample_MF)[1],
#          GMV_port(insample_weights,insample_MF)[0],'ro')

In [54]:
# portfolios(insample_MF,10000)
# plt.plot(GMV_port(insample_weights_extend,insample_MF)[1],
#          GMV_port(insample_weights_extend,insample_MF)[0],'ro')

### **Portfolio optimization using Hedge Funds**

In [55]:
covariance_HFIS = insample_HF.cov()

In [56]:
covariance_HFIS

Unnamed: 0,HFRIDSI,HFRIMAI,HFRIEMNI,HFRIENHI,HFRIEM,HFRIEHI,HFRIEDI,HFRIMI,HFRIMTI,HFRIFOF
HFRIDSI,0.00035023,0.00011517,8.4811e-05,0.00040513,0.0005,0.0004,0.00033597,9.5877e-05,5.393e-05,0.00023549
HFRIMAI,0.00011517,8.9898e-05,3.0836e-05,0.00023396,0.0002,0.0002,0.00015211,5.1627e-05,7.2525e-05,0.00010919
HFRIEMNI,8.4811e-05,3.0836e-05,6.349e-05,8.1595e-05,0.0001,0.0001,8.0205e-05,4.1782e-05,2.9691e-05,7.3211e-05
HFRIENHI,0.00040513,0.00023396,8.1595e-05,0.00105,0.001,0.0007,0.00055223,0.00020366,0.00033429,0.00037796
HFRIEM,0.00050847,0.00024166,0.00013161,0.00095006,0.0012,0.0008,0.00060295,0.00023906,0.00024222,0.00046047
HFRIEHI,0.00037218,0.00018553,0.00010407,0.00071937,0.0008,0.0006,0.00045084,0.00016617,0.00018732,0.00033369
HFRIEDI,0.00033597,0.00015211,8.0205e-05,0.00055223,0.0006,0.0005,0.00038906,0.00011544,0.00012235,0.0002627
HFRIMI,9.5877e-05,5.1627e-05,4.1782e-05,0.00020366,0.0002,0.0002,0.00011544,0.00021995,0.00024159,0.00012526
HFRIMTI,5.393e-05,7.2525e-05,2.9691e-05,0.00033429,0.0002,0.0002,0.00012235,0.00024159,0.00050998,0.00013194
HFRIFOF,0.00023549,0.00010919,7.3211e-05,0.00037796,0.0005,0.0003,0.0002627,0.00012526,0.00013194,0.00022012


In [57]:
#define the objective functions for the GMV optimization problems
def port_std_HFIS(w):
    return np.sqrt(reduce(np.dot, [w, covariance_HFIS, w.T]))  

In [58]:
#define the long only GMV optimization function given the objective function and number of assets
def GMVoptimize(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    bounds = tuple((0, 1) for x in range(num))  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints, bounds=bounds)  
    return outcome.x

#define the function to calculate optimal portfolio weights for long short GMV portfolios
def GMVoptimize_extend(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints)  
    return outcome.x

In [59]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
GMV_weights = GMVoptimize(port_std_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old =reduce(np.dot, [GMV_weights, covariance_HFIS, GMV_weights.T])
    new_w=GMVoptimize(port_std_HFIS,10,a)
    new=reduce(np.dot, [new_w, covariance_HFIS, new_w.T])
    if new<=old:
        GMV_weights=new_w
    else:
        GMV_weights=GMV_weights
        
#this gives us the final optimal weights that optimizes the long only GMV portfolios
print(GMV_weights)
#we also print out the resulting portfolio variance
print(reduce(np.dot, [GMV_weights, covariance_HFIS, GMV_weights.T]))

[1.0249e-17 3.3816e-01 6.2563e-01 3.7801e-18 2.3246e-17 0.0000e+00
 1.5964e-17 3.6211e-02 1.0408e-17 2.9159e-17]
5.162408874476759e-05


In [60]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
GMV_weights2 = GMVoptimize_extend(port_std_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old =reduce(np.dot, [GMV_weights2, covariance_HFIS, GMV_weights2.T])
    new_w=GMVoptimize_extend(port_std_HFIS,10,a)
    new=reduce(np.dot, [new_w, covariance_HFIS, new_w.T])
    if new<=old:
        GMV_weights2=new_w
    else:
        GMV_weights2=GMV_weights2
        
#this gives us the final optimal weights that optimizes the long short GMV portfolios
print(GMV_weights2)
#we also print out the resulting portfolio variance
print(reduce(np.dot, [GMV_weights2, covariance_HFIS, GMV_weights2.T]))

[ 0.0861  0.6544  0.5578  0.0945 -0.0392 -0.2648 -0.14    0.1141 -0.0377
 -0.0252]
2.8310297544986437e-05


In [61]:
#just to verify our weights
# ones=np.ones(10).reshape(10,1)
# df=covariance_HFIS
# df_inv = pd.DataFrame(np.linalg.pinv(df.values), df.columns, df.index)
# xxy=df_inv.dot(ones) / (ones.T.dot(df_inv.dot(ones)))
# xxy[0]
# reduce(np.dot, [xxy[0], covariance_HFIS, xxy[0].T])

In [77]:
covariance_HFIS = insample_HF.cov()

In [78]:
def port_std_HFIS(w):
    return np.sqrt(reduce(np.dot, [w, covariance_HFIS, w.T]))  

In [80]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
GMV_weights = GMVoptimize(port_std_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old =reduce(np.dot, [GMV_weights, covariance_HFIS, GMV_weights.T])
    new_w=GMVoptimize(port_std_HFIS,10,a)
    new=reduce(np.dot, [new_w, covariance_HFIS, new_w.T])
    if new<=old:
        GMV_weights=new_w
    else:
        GMV_weights=GMV_weights
        
#this gives us the final optimal weights that optimizes the long only GMV portfolios
print(GMV_weights)
#we also print out the resulting portfolio variance
print(reduce(np.dot, [GMV_weights, covariance_MFIS, GMV_weights.T]))

[0.0000e+00 3.3740e-01 6.2672e-01 0.0000e+00 0.0000e+00 2.2890e-17
 0.0000e+00 3.5879e-02 1.3319e-17 1.2764e-17]
0.014992226801741863


In [99]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
GMV_weights2 = GMVoptimize_extend(port_std_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old =reduce(np.dot, [GMV_weights2, covariance_HFIS, GMV_weights2.T])
    new_w=GMVoptimize_extend(port_std_HFIS,10,a)
    new=reduce(np.dot, [new_w, covariance_HFIS, new_w.T])
    if new<=old:
        GMV_weights2=new_w
    else:
        GMV_weights2=GMV_weights2
        
#this gives us the final optimal weights that optimizes the long short GMV portfolios
print(GMV_weights2)
#we also print out the resulting portfolio variance
print(reduce(np.dot, [GMV_weights2, covariance_MFIS, GMV_weights2.T]))

[ 0.0886  0.6505  0.5503  0.1058 -0.0473 -0.2662 -0.153   0.1361 -0.0521
 -0.0126]
0.018258589304647334


In [83]:
#just to verify our weights
ones=np.ones(10).reshape(10,1)
df=covariance_HFIS
df_inv = pd.DataFrame(np.linalg.pinv(df.values), df.columns, df.index)
zzy=df_inv.dot(ones) / (ones.T.dot(df_inv.dot(ones)))
zzy[0]
reduce(np.dot, [zzy[0], covariance_HFIS, zzy[0].T])

2.817418628058746e-05

In [84]:
zzy

Unnamed: 0,0
HFRIDSI,0.1327
HFRIMAI,0.679
HFRIEMNI,0.5492
HFRIENHI,0.1047
HFRIEM,-0.0448
HFRIEHI,-0.2307
HFRIEDI,-0.2242
HFRIMI,0.124
HFRIMTI,-0.0427
HFRIFOF,-0.0472


In [85]:
def sharpe_HFIS(w):
    mu = np.dot(w,insample_HF.mean())
    S = reduce(np.dot, [w, covariance_HFIS, w.T])
    sq=S**0.5
    return -(mu/sq)

In [86]:
#define the long only MVP optimization function given the objective function and number of assets
def MVPoptimize(func, num,init):
    bounds = tuple((0, 1) for x in range(num))  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints, bounds=bounds)  
    return outcome.x

#define the long short MVP optimization function given the objective function and number of assets
def MVPoptimize_extend(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    outcome = solver.minimize(func, x0=init, constraints=constraints)  
    return outcome.x

In [87]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
MVP_weights = MVPoptimize(sharpe_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(MVP_weights,insample_HF)[0]/(reduce(np.dot, [MVP_weights, covariance_HFIS, MVP_weights.T])**0.5)
    new_w=MVPoptimize(sharpe_MFIS,10,a)
    new=port_stats(new_w,insample_HF)[0]/(reduce(np.dot, [new_w, covariance_HFIS, new_w.T])**0.5)    
    if new>=old:
        MVP_weights=new_w
    else:
        MVP_weights=MVP_weights
        
#this gives us the final optimal weights that optimizes the long only MVP portfolios
print(MVP_weights)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(MVP_weights,insample_HF)[0]/(reduce(np.dot, [MVP_weights, covariance_HFIS, MVP_weights.T])**0.5))

[5.6093e-01 1.7827e-03 3.1984e-17 0.0000e+00 0.0000e+00 0.0000e+00
 1.9125e-16 3.2239e-01 1.1490e-01 1.1265e-16]
0.3556146505857898


In [88]:
#we run the optimization with multiple initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
MVP_weights2 = MVPoptimize_extend(sharpe_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(MVP_weights2,insample_HF)[0]/(reduce(np.dot, [MVP_weights2, covariance_HFIS, MVP_weights2.T])**0.5)
    new_w=MVPoptimize_extend(sharpe_MFIS,10,a)
    new=port_stats(new_w,insample_HF)[0]/(reduce(np.dot, [new_w, covariance_HFIS, new_w.T])**0.5)
    if new>old:
        MVP_weights2=new_w
    else:
        MVP_weights2=MVP_weights2
        
#this gives us the final optimal weights that optimizes the long short MVP portfolios
print(MVP_weights2)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(MVP_weights2,insample_HF)[0]/(reduce(np.dot, [MVP_weights2, covariance_HFIS, MVP_weights2.T])**0.5))

[ 1.4181  1.4761  0.3211 -0.147   0.7701 -0.7679 -0.037   0.529   0.3012
 -2.8638]
0.8415933819207552


In [100]:
mu=insample_HF.mean()
mu=np.asarray(mu)
S=np.asarray(covariance_HFIS)
w=np.dot(inv(S),mu)

In [101]:
port_stats(w,insample_HF)

[0.708283371194843, 0.8415957290735515]

In [103]:
#define the tracking error constraint
def constraintHF(w):
    benchmark=np.array([1.0 / 10 for x in range(10)])
    x=w-benchmark
    return np.sqrt(reduce(np.dot, [x.T, covariance_HFIS, x]))-0.05

In [104]:
#define the MVP optimization with tracking error constraint function given the objective function and number of assets
def HF_TrackError(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1},
                  {'type': 'ineq', 'fun': constraintHF}] 
    outcome = solver.minimize(func, x0=init, constraints=constraints)  
    return outcome.x

In [105]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
TE_weights = HF_TrackError(sharpe_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(TE_weights,insample_HF)[0]/(reduce(np.dot, [TE_weights, covariance_HFIS, TE_weights.T])**0.5)
    new_w=HF_TrackError(sharpe_HFIS,10,a)
    new=port_stats(new_w,insample_HF)[0]/(reduce(np.dot, [new_w, covariance_HFIS, new_w.T])**0.5)    
    if new>=old:
        TE_weights=new_w
    else:
        TE_weights=TE_weights
        
#this gives us the final optimal weights that optimizes the MVP portfolios with tracking error constraint
print(TE_weights)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(TE_weights,insample_HF)[0]/(reduce(np.dot, [TE_weights, covariance_HFIS, TE_weights.T])**0.5))

[ 4.0799  3.1441 -0.1417 -0.6689  2.4569 -1.893   0.3508  1.3731  1.0088
 -8.71  ]
0.822976644373562


In [106]:
#define the factor neutral constraints for the fama french 5 factors
def constraint2_1HF(w):
    return HF_data['mktrf'].dot(w)-0
def constraint2_2HF(w):
    return HF_data['smb'].dot(w)-0
def constraint2_3HF(w):
    return HF_data['hml'].dot(w)-0
def constraint2_4HF(w):
    return HF_data['RMW'].dot(w)-0
def constraint2_5HF(w):
    return HF_data['CMA'].dot(w)-0

In [107]:
def HF_FactorNeutral(func, num,init):
#     init = np.array([1.0 / num for x in range(num)])  
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1},
                  {'type': 'eq', 'fun': constraint2_1HF},
                  {'type': 'eq', 'fun': constraint2_2HF},
                  {'type': 'eq', 'fun': constraint2_3HF},
                  {'type': 'eq', 'fun': constraint2_4HF},
                  {'type': 'eq', 'fun': constraint2_5HF}]
    bounds = tuple((-2, 2) for x in range(num))  
    outcome = solver.minimize(func, x0=init, constraints=constraints,bounds=bounds)  
    return outcome.x

In [108]:
#we run the optimization with 100 initial guesses, and pick the best one - this reduces our chance to get stuck on a local minimum
first_guess = np.array([1.0 / 10 for x in range(10)])  
FN_weights = HF_FactorNeutral(sharpe_HFIS,10,first_guess)
for _ in range(100):
    a = np.random.random(10)
    a /= a.sum()
    old = port_stats(FN_weights,insample_HF)[0]/(reduce(np.dot, [FN_weights, covariance_HFIS, FN_weights.T])**0.5)
    new_w=MF_FactorNeutral(sharpe_HFIS,10,a)
    new=port_stats(new_w,insample_HF)[0]/(reduce(np.dot, [new_w, covariance_HFIS, new_w.T])**0.5)    
    if new>=old:
        FN_weights=new_w
    else:
        FN_weights=FN_weights
        
#this gives us the final optimal weights that optimizes the MVP portfolios with factor neutral constraint
print(FN_weights)
#we also print out the resulting portfolio sharpe ratio
print(port_stats(FN_weights,insample_HF)[0]/(reduce(np.dot, [FN_weights, covariance_HFIS, FN_weights.T])**0.5))

[ 1.6807  2.     -1.406   0.4796  0.8734 -1.6174 -1.0732  1.2429 -0.0585
 -1.1215]
0.5924002127418379


In [109]:
#Create table for output as required
HFIS_table = pd.DataFrame(index=['Long only GMV','Long only MVP','Long short GMV',
                                'Long short MVP','Tracking error <=5%','Factor Neutral'], columns = ['Mean',
                                                                                                   'volatility','sharpe','mktrf','smb','hml',
                                                                                                   'RMW','CMA','umd'])
HFOS_table = pd.DataFrame(index=['Long only GMV','Long only MVP','Long short GMV',
                                'Long short MVP','Tracking error <=5%','Factor Neutral'], columns = ['Mean',
                                                                                                   'volatility','sharpe','mktrf','smb','hml',
                                                                                                   'RMW','CMA','umd'])

In [112]:
#Calculate the portfolio statistics using insample data for mutual funds 
HFIS_table.loc['Long only GMV']['Mean']= port_stats(GMV_weights,insample_HF)[0]
HFIS_table.loc['Long only GMV']['volatility']= port_stats(GMV_weights,insample_HF)[1] ** 2

HFIS_table.loc['Long only MVP']['Mean']= port_stats(MVP_weights,insample_HF)[0]
HFIS_table.loc['Long only MVP']['volatility']= port_stats(MVP_weights,insample_HF)[1] ** 2

HFIS_table.loc['Long short GMV']['Mean']= port_stats(GMV_weights2,insample_HF)[0]
HFIS_table.loc['Long short GMV']['volatility']= port_stats(GMV_weights2,insample_HF)[1] ** 2

HFIS_table.loc['Long short MVP']['Mean']= port_stats(MVP_weights2,insample_HF)[0]
HFIS_table.loc['Long short MVP']['volatility']= port_stats(MVP_weights2,insample_HF)[1] ** 2

HFIS_table.loc['Tracking error <=5%']['Mean']= port_stats(TE_weights,insample_HF)[0]
HFIS_table.loc['Tracking error <=5%']['volatility']= port_stats(TE_weights,insample_HF)[1] ** 2

HFIS_table.loc['Factor Neutral']['Mean']= port_stats(FN_weights,insample_HF)[0]
HFIS_table.loc['Factor Neutral']['volatility']= port_stats(FN_weights,insample_HF)[1] ** 2

HFIS_table['Mean']=(1+HFIS_table['Mean'])**12-1
HFIS_table['volatility']=np.sqrt(12)*HFIS_table['volatility']

for i in HFIS_table.index:
    HFIS_table.loc[i]['sharpe']=(HFIS_table.loc[i]['Mean'])/(HFIS_table.loc[i]['volatility']**0.5)

for i in ['mktrf','smb','hml', 'RMW','CMA','umd']:
    HFIS_table.loc['Long only GMV'][i]=HF_data[i].dot(GMV_weights)
    HFIS_table.loc['Long only MVP'][i]=HF_data[i].dot(MVP_weights)
    HFIS_table.loc['Long short GMV'][i]=HF_data[i].dot(GMV_weights2)
    HFIS_table.loc['Long short MVP'][i]=HF_data[i].dot(MVP_weights2)
    HFIS_table.loc['Tracking error <=5%'][i]=HF_data[i].dot(TE_weights)
    HFIS_table.loc['Factor Neutral'][i]=HF_data[i].dot(FN_weights)

In [116]:
#Calculate the portfolio statistics using outsample data for mutual funds 
HFOS_table.loc['Long only GMV']['Mean']= port_stats(GMV_weights,outsample_HF)[0]
HFOS_table.loc['Long only GMV']['volatility']= port_stats(GMV_weights,outsample_HF)[1] ** 2

HFOS_table.loc['Long only MVP']['Mean']= port_stats(MVP_weights,outsample_HF)[0]
HFOS_table.loc['Long only MVP']['volatility']= port_stats(MVP_weights,outsample_HF)[1] ** 2

HFOS_table.loc['Long short GMV']['Mean']= port_stats(GMV_weights2,outsample_HF)[0]
HFOS_table.loc['Long short GMV']['volatility']= port_stats(GMV_weights2,outsample_HF)[1] ** 2

HFOS_table.loc['Long short MVP']['Mean']= port_stats(MVP_weights2,outsample_HF)[0]
HFOS_table.loc['Long short MVP']['volatility']= port_stats(MVP_weights2,outsample_HF)[1] ** 2

HFOS_table.loc['Tracking error <=5%']['Mean']= port_stats(TE_weights,outsample_HF)[0]
HFOS_table.loc['Tracking error <=5%']['volatility']= port_stats(TE_weights,outsample_HF)[1] ** 2

HFOS_table.loc['Factor Neutral']['Mean']= port_stats(FN_weights,outsample_HF)[0]
HFOS_table.loc['Factor Neutral']['volatility']= port_stats(FN_weights,outsample_HF)[1] ** 2

HFOS_table['Mean']=(1+HFOS_table['Mean'])**12-1
HFOS_table['volatility']=np.sqrt(12)*HFOS_table['volatility']

for i in HFOS_table.index:
    HFOS_table.loc[i]['sharpe']=(HFOS_table.loc[i]['Mean'])/(HFOS_table.loc[i]['volatility']**0.5)

for i in ['mktrf','smb','hml', 'RMW','CMA','umd']:
    HFOS_table.loc['Long only GMV'][i]=HF_data[i].dot(GMV_weights)
    HFOS_table.loc['Long only MVP'][i]=HF_data[i].dot(MVP_weights)
    HFOS_table.loc['Long short GMV'][i]=HF_data[i].dot(GMV_weights2)
    HFOS_table.loc['Long short MVP'][i]=HF_data[i].dot(MVP_weights2)
    HFOS_table.loc['Tracking error <=5%'][i]=HF_data[i].dot(TE_weights)
    HFOS_table.loc['Factor Neutral'][i]=HF_data[i].dot(FN_weights)

In [117]:
HFIS_table

Unnamed: 0,Mean,volatility,sharpe,mktrf,smb,hml,RMW,CMA,umd
Long only GMV,0.01614,0.0001788,1.207,0.1124,0.02854,0.009446,0.02056,0.01518,0.05399
Long only MVP,0.06199,0.0006916,2.357,0.1809,0.05748,0.002087,0.00116,0.01226,0.02597
Long short GMV,0.0163,9.801e-05,1.646,0.03275,0.01039,0.0008332,0.008598,0.03038,0.04976
Long short MVP,0.1883,0.001025,5.88,-0.06443,-0.00818,0.1425,0.06639,0.1245,-0.09863
Tracking error <=5%,0.6305,0.008843,6.705,-0.2767,-0.04631,0.4347,0.1799,0.3223,-0.4062
Factor Neutral,0.2074,0.002472,4.17,5.549e-12,-1.966e-11,-5.641e-12,7.917e-13,-1.944e-12,-0.1367


In [118]:
HFOS_table

Unnamed: 0,Mean,volatility,sharpe,mktrf,smb,hml,RMW,CMA,umd
Long only GMV,0.02654,8.238e-05,2.924,0.1124,0.02854,0.009446,0.02056,0.01518,0.05399
Long only MVP,0.01967,0.0003451,1.059,0.1809,0.05748,0.002087,0.00116,0.01226,0.02597
Long short GMV,0.02181,7.667e-05,2.491,0.03275,0.01039,0.0008332,0.008598,0.03038,0.04976
Long short MVP,0.001652,0.001149,0.04873,-0.06443,-0.00818,0.1425,0.06639,0.1245,-0.09863
Tracking error <=5%,-0.03952,0.01057,-0.3844,-0.2767,-0.04631,0.4347,0.1799,0.3223,-0.4062
Factor Neutral,-0.01564,0.001351,-0.4254,5.549e-12,-1.966e-11,-5.641e-12,7.917e-13,-1.944e-12,-0.1367
