In [27]:
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt

from operator import itemgetter

# Setup

#### Utility functions

In [28]:
def sample_data(getter, original_data, data_keys):
    sampled_data = {}
    for key in data_keys:
        sampled_data[key] = np.array(getter(original_data[key]))
    return sampled_data

def sharpe_ratio(portfolio, values):
    """
    Empirical Sharpe ratio.
    """
    volumes = np.empty_like(portfolio)
    volumes[:] = portfolio
    volumes[np.where(volumes == 0)] = np.nan
    return np.nanmean(volumes*values)/np.sqrt(np.nanvar(volumes*values))

#### Load data & transform to data matrices with dims `[NUM_STOCKS, TIME]`

In [29]:
data = loadmat('portfolio.mat')
keys = ['hi', 'lo', 'open', 'close', 'vol', 'stocks']

all_data = {}
all_data['hi'] = np.nan_to_num(data['train_hi'].T)
all_data['lo'] = np.nan_to_num(data['train_lo'].T)
all_data['open'] = np.nan_to_num(data['train_op'].T)
all_data['close'] = np.nan_to_num(data['train_cl'].T)
all_data['vol'] = np.nan_to_num(data['train_vol'].T)
all_data['stocks'] = np.array([entry[0] for entry in data['train_stocks'][0]])
NUM_STOCKS = all_data['stocks'].shape[0]
TIME = all_data['open'].shape[1]

#### Split the dataset

In [30]:
NUM_TRAIN = 300
NUM_VALIDATION = 100
NUM_TEST = NUM_STOCKS - NUM_TRAIN - NUM_VALIDATION

In [31]:
permuted_stocks = np.random.permutation(NUM_STOCKS)
get_train = itemgetter(*permuted_stocks[:NUM_TRAIN])
get_validation = itemgetter(*permuted_stocks[NUM_TRAIN:NUM_TRAIN+NUM_VALIDATION])
get_test = itemgetter(*permuted_stocks[NUM_TRAIN+NUM_VALIDATION:NUM_STOCKS])

train_data = sample_data(get_train, all_data, keys)
validation_data = sample_data(get_validation, all_data, keys)
test_data = sample_data(get_test, all_data, keys)

# Framework

In [46]:
def simulate(init_portfolio, strategy, dataset):
    
    # initialize
    b_train = np.zeros([NUM_TRAIN, TIME+1])
    x_train = np.zeros([NUM_TRAIN, TIME])
    logS = np.zeros(TIME)
    
    b_train[:,0] = init_portfolio # have no knowledge for first portfolio
    
    # main loop
    for t in range(1, TIME):
        
        zero_locs = np.where(dataset[:,t-1] == 0)[0]
        nonzero_locs = np.where(dataset[:,t-1] != 0)[0]
        
        x_train[zero_locs,t] = 1
        
        x_train[nonzero_locs,t] = np.nan_to_num(dataset[nonzero_locs,t]/dataset[nonzero_locs,t-1])
        
        b_train[:,t] = strategy(b_train, x_train, t)
        
        logS[t] = logS[t-1] + np.log(np.dot(x_train[:,t], b_train[:,t]))
        
    return b_train, x_train, logS

In [33]:
def strategy(portfolio, dataset, timestep):
    pass

# Baselines

In [48]:
unif_init = 1./NUM_TRAIN * np.ones(NUM_TRAIN)

def CRP(portfolio, dataset, timestep):
    return portfolio[:,timestep-1]

b, x, logS = simulate(unif_init, CRP, train_data['close'])
print logS[0], logS[1], logS[-1]

print np.exp(logS[-1])

0.0 -0.0144760105027 0.505014258754
1.65700914718


In [1]:
x = np.array([1, 2, 3, 4])
len(x)

NameError: name 'np' is not defined

In [14]:
x = np.array([[1., 0, 2], [4, 0, 5], [6, 0, 7]])
print x
for t in range(2):
    zero_locs = np.where(x[:,t] == 0)[0]
    x[zero_locs,t] = (x[zero_locs,t-1]+x[zero_locs,t+1])/2
print x

[[ 1.  0.  2.]
 [ 4.  0.  5.]
 [ 6.  0.  7.]]
[[ 1.   1.5  2. ]
 [ 4.   4.5  5. ]
 [ 6.   6.5  7. ]]
