In [57]:
import numpy as np
import glob
import datetime

In [58]:
dataset = []
for idx in glob.glob("data/*.csv"):
    if idx == 'data/S&P500.csv': continue
    stock = np.genfromtxt(idx,skip_header=1,usecols=(0,1),delimiter=',',dtype=["S10","f8"])
    data = np.zeros((stock.shape[0],2))
    data[:,0] = np.array([datetime.datetime.strptime(entry[0], "%Y-%m-%d").date().timetuple().tm_yday for entry in stock])
    data[:,1] = np.array([entry[1] for entry in stock])
    dataset.append((idx,data))

In [59]:
num_stocks = 4
data_batches = []
idxs = []

for stock1 in range(0,len(dataset)):
    for stock2 in range(stock1+1,len(dataset)):
        for stock3 in range(stock2+1,len(dataset)):
            for stock4 in range(stock3+1,len(dataset)):
                size = np.min([dataset[stock1][1].shape[0],dataset[stock2][1].shape[0],dataset[stock3][1].shape[0],dataset[stock4][1].shape[0]])
                data_batch = np.hstack((dataset[stock1][1][0:size,:],dataset[stock2][1][0:size,:],dataset[stock3][1][0:size,:],dataset[stock4][1][0:size,:]))
                data_batches.append(data_batch)
                if not idxs:
                    idxs.append(data_batch.shape[0]-2)
                else:
                    idxs.append(idxs[-1] + data_batch.shape[0] - 1)

In [60]:
def label(current, future):
    label = np.zeros(5) # [stock1, stock2, stock3, stock4, Cash]
    profit = np.true_divide(future - current, current)
    idx = np.argmax(profit)
    if profit[idx] <= 0: idx = -1
    label[idx] = 1
    return label

In [61]:
label_batches = []

for batch in data_batches:
    label_batch = np.zeros((batch.shape[0]-1,5))
    for idx in range(batch.shape[0]-1):
        current, future = batch[idx+1][[1,3,5,7]], batch[idx][[1,3,5,7]]
        label_batch[idx] = label(current,future)
    label_batches.append(label_batch)

In [62]:
X_train = np.vstack((batch[1:,:] for batch in data_batches))

In [63]:
X_train.shape

(264809, 8)

In [64]:
Y_train = np.vstack(np.vstack((label for label in label_batch)) for label_batch in label_batches)

In [65]:
Y_train.shape

(264809, 5)

In [66]:
np.save('X_train',X_train)
np.save('Y_train',Y_train)

In [92]:
import numpy as np
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense, Dropout, Activation
from keras.layers import LSTM, TimeDistributed

In [69]:
X_train = np.load('X_train.npy')
Y_train = np.load('Y_train.npy')

mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

X_train_norm = (X_train - mean)/std

In [70]:
input_dim = X_train.shape[1]
output_dim = Y_train.shape[1]
T = 100
X_train_seq = []
Y_train_seq = []
for i in range(len(idxs)):
    if i == 0:
        start_idx = 0
    else:
        start_idx = idxs[i-1] + 1
    end_idx = idxs[i]
    for j in range(start_idx + T - 1, end_idx+1):
        tX = X_train_norm[j - (T - 1): j + 1]
        tY = Y_train[j]
        X_train_seq.append(tX[None,:,:])
        Y_train_seq.append(tY[None,:])
        del tX
        del tY

X_train_seq = np.concatenate(X_train_seq, axis=0)
Y_train_seq = np.concatenate(Y_train_seq, axis=0)

In [109]:
data_dim = 8
timesteps = 100
nb_classes = 5

model = Sequential()
model.add(TimeDistributed(Dense(42, activation='relu'), input_shape=(timesteps, data_dim)))
model.add(LSTM(42, return_sequences=True))
model.add(LSTM(42, return_sequences=False))
model.add(Dense(5,activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['categorical_accuracy'])

# model.fit(X_train_seq[:20], Y_train_seq[:20], batch_size=20, nb_epoch=20)
#score = model.evaluate(X_test, Y_test, batch_size=16)

In [None]:
model.fit(X_train_seq, Y_train_seq, batch_size=50, nb_epoch=10000)

Epoch 1/10000
  3900/252335 [..............................] - ETA: 2477s - loss: 1.5969 - categorical_accuracy: 0.2508

In [114]:
predictions = np.argmax(model.predict_proba(X_train_seq[1000:1500,:,:]),axis=1)
truth = np.argmax(Y_train_seq[1000:1500],axis=1)
np.mean(predictions==truth)



0.17999999999999999

In [104]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
timedistributed_7 (TimeDistribute(None, 100, 42)       378         timedistributed_input_5[0][0]    
____________________________________________________________________________________________________
lstm_6 (LSTM)                    (None, 42)            14280       timedistributed_7[0][0]          
____________________________________________________________________________________________________
dense_13 (Dense)                 (None, 5)             215         lstm_6[0][0]                     
Total params: 14873
____________________________________________________________________________________________________


In [105]:
model.get_config()

[{'class_name': 'TimeDistributed',
  'config': {'batch_input_shape': (None, 100, 8),
   'input_dtype': 'float32',
   'layer': {'class_name': 'Dense',
    'config': {'W_constraint': None,
     'W_regularizer': None,
     'activation': 'relu',
     'activity_regularizer': None,
     'b_constraint': None,
     'b_regularizer': None,
     'bias': True,
     'init': 'glorot_uniform',
     'input_dim': None,
     'name': 'dense_12',
     'output_dim': 42,
     'trainable': True}},
   'name': 'timedistributed_7',
   'trainable': True}},
 {'class_name': 'LSTM',
  'config': {'U_regularizer': None,
   'W_regularizer': None,
   'activation': 'tanh',
   'b_regularizer': None,
   'consume_less': 'cpu',
   'dropout_U': 0.0,
   'dropout_W': 0.0,
   'forget_bias_init': 'one',
   'go_backwards': False,
   'init': 'glorot_uniform',
   'inner_activation': 'hard_sigmoid',
   'inner_init': 'orthogonal',
   'input_dim': 42,
   'input_length': None,
   'name': 'lstm_6',
   'output_dim': 42,
   'return_sequen

In [106]:
model.save_weights("weights")

ImportError: No module named h5py

In [None]:
model.load_weights("weights", by_name=False)