#### Imports

In [1]:
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt

from operator import itemgetter

#### Util Functions

In [2]:
def sample_data(getter, original_data, data_keys):
    sampled_data = {}
    for key in data_keys:
        sampled_data[key] = np.array(getter(original_data[key]))
    return sampled_data

def sharpe_ratio(portfolio, values):
    """
    Not real Sharpe ratio, but sufficient for our optimization
    """
    volumes = np.empty_like(portfolio)
    volumes[:] = portfolio
    volumes[np.where(volumes == 0)] = np.nan
    return np.nanmean(volumes*values)/np.sqrt(np.nanvar(volumes*values))

#### Learning Algorithm Functions

#### Load data & transform to data matrices that are of dim num_stocks X time

In [3]:
data = loadmat('portfolio.mat')
keys = ['hi', 'lo', 'open', 'close', 'vol', 'stocks']

all_data = {}
all_data['hi'] = np.nan_to_num(data['train_hi'].T)
all_data['lo'] = np.nan_to_num(data['train_lo'].T)
all_data['open'] = np.nan_to_num(data['train_op'].T)
all_data['close'] = np.nan_to_num(data['train_cl'].T)
all_data['vol'] = np.nan_to_num(data['train_vol'].T)
all_data['stocks'] = np.array([entry[0] for entry in data['train_stocks'][0]])
NUM_STOCKS = all_data['stocks'].shape[0]
TIME = all_data['open'].shape[1]

In [4]:
print all_data['open']
print all_data['close']

[[  36.1    36.15   35.11 ...,   46.6    47.37   48.82]
 [  32.58   32.16   29.68 ...,   16.05   16.59   16.42]
 [  70.79   67.85   67.37 ...,  333.1   343.51  355.  ]
 ..., 
 [  23.78   23.55   23.77 ...,   49.44   50.38   53.62]
 [  74.9    73.61   73.66 ...,   24.49   24.05   23.52]
 [  62.14   62.62   61.96 ...,   61.34   62.59   61.69]]
[[  36.35   35.02   35.7  ...,   46.63   48.53   49.23]
 [  32.16   31.08   29.88 ...,   16.28   16.48   16.81]
 [  68.15   67.7    67.79 ...,  337.86  342.41  350.7 ]
 ..., 
 [  23.66   23.66   23.89 ...,   49.93   50.58   52.64]
 [  73.27   73.52   73.74 ...,   23.8    23.42   23.61]
 [  62.45   62.06   62.03 ...,   63.14   61.37   62.07]]


#### Set amt of data you want for training, testing, and hold out

Right now, we are partitioning the set of stocks

In [44]:
NUM_TRAIN = 300
NUM_VALIDATION = 100
NUM_TEST = NUM_STOCKS - NUM_TRAIN - NUM_VALIDATION

#### Pull out your training, testing and validation sets

In [45]:
permuted_stocks = np.random.permutation(NUM_STOCKS)
get_train = itemgetter(*permuted_stocks[:NUM_TRAIN])
get_validation = itemgetter(*permuted_stocks[NUM_TRAIN:NUM_TRAIN+NUM_VALIDATION])
get_test = itemgetter(*permuted_stocks[NUM_TRAIN+NUM_VALIDATION:NUM_STOCKS])

train_data = sample_data(get_train, all_data, keys)
validation_data = sample_data(get_validation, all_data, keys)
test_data = sample_data(get_test, all_data, keys)

#### Run on training data

Initialize

In [46]:
b_train = np.zeros([NUM_TRAIN, TIME])
S = np.zeros(TIME)
x_train = np.zeros([NUM_TRAIN, TIME])

b_train[:,0] = 1./NUM_TRAIN * np.ones(NUM_TRAIN)
S[0] = 1

Update each time step

In [61]:
for t in range(1,TIME):
    b_train[:,t] = b_train[:,0] #some algo
    x_train[:,t] = np.nan_to_num(train_data['close'][:,t]/train_data['close'][:,t-1])
    S[t] = S[t-1]*np.dot(x_train[:,t], b_train[:,t])
    
    b_train[:,t] = (b_train[:,t]*train_data['close'][:,t])/(np.dot(b_train[:,t],train_data['close'][:,t]))
    #update portfolio selection rules

sharpe_ratio(S[t]*b_train[:,t],train_data['close'][:,t])

inf


  app.launch_new_instance()
  app.launch_new_instance()


nan