In [17]:
import numpy as np
import glob
import datetime

In [18]:
dataset = []
for idx in glob.glob("data/*.csv"):
    if idx == 'data/S&P500.csv': continue
    stock = np.genfromtxt(idx,skip_header=1,usecols=(0,1),delimiter=',',dtype=["S10","f8"])
    data = np.zeros((stock.shape[0],2))
    data[:,0] = np.array([datetime.datetime.strptime(entry[0], "%Y-%m-%d").date().timetuple().tm_yday for entry in stock])
    data[:,1] = np.array([entry[1] for entry in stock])
    dataset.append((idx,data))

In [19]:
num_stocks = 4
data_batches = []
idxs = []

for stock1 in range(0,len(dataset)):
    for stock2 in range(stock1+1,len(dataset)):
        for stock3 in range(stock2+1,len(dataset)):
            for stock4 in range(stock3+1,len(dataset)):
                size = np.min([dataset[stock1][1].shape[0],dataset[stock2][1].shape[0],dataset[stock3][1].shape[0],dataset[stock4][1].shape[0]])
                data_batch = np.hstack((dataset[stock1][1][0:size,:],dataset[stock2][1][0:size,:],dataset[stock3][1][0:size,:],dataset[stock4][1][0:size,:]))
                data_batches.append(data_batch)
                if not idxs:
                    idxs.append(data_batch.shape[0]-2)
                else:
                    idxs.append(idxs[-1] + data_batch.shape[0] - 1)

In [22]:
idxs

[1128,
 4209,
 9116,
 14023,
 18930,
 20536,
 21665,
 22794,
 23923,
 25052,
 26181,
 29262,
 32343,
 35424,
 37030,
 41937,
 46844,
 48450,
 53357,
 54963,
 56569,
 57698,
 58827,
 59956,
 61085,
 62214,
 65295,
 68376,
 71457,
 73063,
 79801,
 86539,
 88145,
 94883,
 96489,
 98095,
 99224,
 100353,
 101482,
 102611,
 103740,
 104869,
 105998,
 107127,
 108256,
 109385,
 112466,
 115547,
 117153,
 120234,
 121840,
 123446,
 131179,
 132785,
 134391,
 135997,
 137126,
 138255,
 139384,
 140513,
 141642,
 144723,
 147804,
 150885,
 152491,
 157398,
 162305,
 163911,
 168818,
 170424,
 172030,
 173159,
 174288,
 175417,
 176546,
 177675,
 178804,
 179933,
 181062,
 182191,
 183320,
 186401,
 189482,
 191088,
 194169,
 195775,
 197381,
 202288,
 203894,
 205500,
 207106,
 208235,
 209364,
 210493,
 211622,
 212751,
 213880,
 215009,
 216138,
 217267,
 218396,
 221477,
 224558,
 226164,
 229245,
 230851,
 232457,
 239195,
 240801,
 242407,
 244013,
 245142,
 246271,
 247400,
 248529,
 2496

In [4]:
def label(current, future):
    label = np.zeros(5) # [stock1, stock2, stock3, stock4, Cash]
    profit = np.true_divide(future - current, current)
    idx = np.argmax(profit)
    if profit[idx] <= 0: idx = -1
    label[idx] = 1
    return label

In [5]:
label_batches = []

for batch in data_batches:
    label_batch = np.zeros((batch.shape[0]-1,5))
    for idx in range(batch.shape[0]-1):
        current, future = batch[idx+1][[1,3,5,7]], batch[idx][[1,3,5,7]]
        label_batch[idx] = label(current,future)
    label_batches.append(label_batch)

In [6]:
X_train = np.vstack((batch[1:,:] for batch in data_batches))

In [7]:
X_train.shape

(264809, 8)

In [8]:
Y_train = np.vstack(np.vstack((label for label in label_batch)) for label_batch in label_batches)

In [9]:
Y_train.shape

(264809, 5)

In [10]:
np.save('X_train',X_train)
np.save('Y_train',Y_train)

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import LSTM

Using TensorFlow backend.


In [13]:
X_train = np.load('X_train.npy')
Y_train = np.load('Y_train.npy')

mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

X_train_norm = (X_train - mean)/std

In [20]:
input_dim = X_train.shape[1]
output_dim = Y_train.shape[1]
T = 100
X_train_seq = []
Y_train_seq = []
for i in range(len(idxs)):
    if i == 0:
        start_idx = 0
    else:
        start_idx = idxs[i-1] + 1
    end_idx = idxs[i]
    for j in range(start_idx + T - 1, end_idx+1):
        tX = X_train_norm[j - (T - 1): j + 1]
        tY = Y_train[j]
        X_train_seq.append(tX[None,:,:])
        Y_train_seq.append(tY[None,:])
        del tX
        del tY

X_train_seq = np.concatenate(X_train_seq, axis=0)
Y_train_seq = np.concatenate(Y_train_seq, axis=0)

In [27]:
data_dim = 8
timesteps = 100
nb_classes = 5

model = Sequential()
#model.add(Dense(42, input_dim=8))
model.add(LSTM(100, return_sequences=False, input_shape=(timesteps, data_dim)))
#model.add(Dropout(0.5))
model.add(Dense(5,activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['categorical_crossentropy'])

model.fit(X_train_seq[:300], Y_train_seq[:300], batch_size=50, nb_epoch=20)
#score = model.evaluate(X_test, Y_test, batch_size=16)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x17e56ef50>

In [105]:
model.fit(X_train_seq[:50], Y_train_seq[:50], batch_size=25, nb_epoch=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x117053610>

In [108]:
predictions = np.argmax(model.predict_proba(X_train_seq[:50,:,:]),axis=1)
truth = np.argmax(Y_train_seq[:50],axis=1)
np.mean(predictions==truth)



0.78000000000000003