In [1]:
import numpy as np
from scipy.io import loadmat
import matplotlib as plt
import cvxpy

from operator import itemgetter

# Setup

#### Utility functions

In [2]:
def sample_data(getter, original_data, data_keys):
    sampled_data = {}
    for key in data_keys:
        sampled_data[key] = np.array(getter(original_data[key]))
    return sampled_data

def sharpe_ratio(portfolio, values):
    """
    Empirical Sharpe ratio.
    """
    volumes = np.empty_like(portfolio)
    volumes[:] = portfolio
    volumes[np.where(volumes == 0)] = np.nan
    return np.nanmean(volumes*values)/np.sqrt(np.nanvar(volumes*values))

#### Load data & transform to data matrices with dims `[NUM_STOCKS, TIME]`

In [3]:
data = loadmat('portfolio.mat')
keys = ['hi', 'lo', 'open', 'close', 'vol', 'stocks']

all_data = {}
all_data['hi'] = np.nan_to_num(data['train_hi'].T)
all_data['lo'] = np.nan_to_num(data['train_lo'].T)
all_data['open'] = np.nan_to_num(data['train_op'].T)
all_data['close'] = np.nan_to_num(data['train_cl'].T)
all_data['vol'] = np.nan_to_num(data['train_vol'].T)
all_data['stocks'] = np.array([entry[0] for entry in data['train_stocks'][0]])
NUM_STOCKS = all_data['stocks'].shape[0]
TIME = all_data['open'].shape[1]

#### Split the dataset

In [4]:
NUM_TRAIN = 300
NUM_VALIDATION = 100
NUM_TEST = NUM_STOCKS - NUM_TRAIN - NUM_VALIDATION

In [5]:
permuted_stocks = np.random.permutation(NUM_STOCKS)
get_train = itemgetter(*permuted_stocks[:NUM_TRAIN])
get_validation = itemgetter(*permuted_stocks[NUM_TRAIN:NUM_TRAIN+NUM_VALIDATION])
get_test = itemgetter(*permuted_stocks[NUM_TRAIN+NUM_VALIDATION:NUM_STOCKS])

train_data = sample_data(get_train, all_data, keys)
validation_data = sample_data(get_validation, all_data, keys)
test_data = sample_data(get_test, all_data, keys)

In [6]:
# linearly interpolate missing data points
for i in range(NUM_TRAIN):
    xp = [t for t in range(TIME) if (train_data['close'][i,t]!=0)]
    fp = [train_data['close'][i,t] for t in range(TIME) if (train_data['close'][i,t]!=0)]
    #print i, train_data['stocks'][i]
    #print xp
    #print fp
    if xp!=[] and fp!=[]:
        train_data['close'][i,:] = np.interp(range(TIME),np.array(xp),np.array(fp))

# Framework

In [7]:
def simulate(init_portfolio, strategy, dataset):
    
    # initialize
    b_train = np.zeros([NUM_TRAIN, TIME])
    x_train = np.zeros([NUM_TRAIN, TIME])
    S = np.zeros(TIME)
    
    b_train[:,0] = init_portfolio
    S[0] = 1
    x_train[:,0] = np.ones([1,NUM_TRAIN])
    
    # main loop
    for t in range(1, TIME):
        
        #compute next x values only after we have set the portfolio
        x_train[:,t] = np.nan_to_num(dataset['close'][:,t]/dataset['close'][:,t-1])
        
        #update wealth according to market changes
        S[t] = S[t-1]*np.dot(np.nan_to_num(x_train[:,t]), b_train[:,t-1])
        
        #update portfolio values according to market changes and normalize
        b_train[:,t-1] = b_train[:,t-1]*x_train[:,t]/np.dot(b_train[:,t-1],x_train[:,t])
        
        #compute new portfolio based on previous time-step information
        b_train[:,t] = strategy(b_train, x_train, t)
        
        #compute transaction costs
        num_trans = (sum(np.greater(b_train[:,t], b_train[:,t-1]+10e-5*np.ones(NUM_TRAIN)))
                    +sum(np.logical_and(np.less(b_train[:,t],np.zeros(NUM_TRAIN)),np.less(b_train[:,t],b_train[:,t-1]))))
        if num_trans != 0:
            print num_trans
        trans_cost = num_trans*.0005
        
        #update wealth according to transaction costs
        S[t] = S[t] - trans_cost
        
    return b_train, x_train, S

In [8]:
def rebalanced(b, x, t):
    return b[:,0]

In [9]:
def BAH(b,x,t):
    return b[:,t-1]

In [10]:
def FTL(b,x,t):
    eta = 1
    b_new = cvxpy.Variable(NUM_TRAIN)
    obj = cvxpy.Minimize(cvxpy.pnorm(b_new-b[:,t-1],1)-
                         eta*cvxpy.log(cvxpy.sum_entries(x[:,t-1]*b_new)))
    constraints = [cvxpy.sum_entries(b_new)==1. , b_new <= 0.1, b_new >= -0.1]
    prob = cvxpy.Problem(obj, constraints)
    prob.solve(verbose=False,solver='SCS')
    #print np.amax(b_new.value)
    return np.transpose(b_new.value)

In [11]:
uniform_init_port = 1./NUM_TRAIN * np.ones(NUM_TRAIN)
b,x,S = simulate(uniform_init_port,FTL,train_data)
print(S[TIME-1])



1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
