In [1]:
# Modules
import numpy as np
from keras.layers import Dense, Dropout, GaussianNoise, GRU, LSTM, Conv1D
from keras.layers.pooling import MaxPooling1D, GlobalAveragePooling1D
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers.wrappers import Bidirectional
import keras
import os
import pandas as pd

# Source data
data_file_path = ''
data_filename = 'spx_history.csv'

# Model
model_file = 'model.hd5'
num_epochs = 10000
validation_frac = 0.2
batch_size = 128
optimizer = 'adam'
loss = 'binary_crossentropy'
metrics = ['accuracy']

Using TensorFlow backend.


In [2]:
def get_samples(time_series, outcomes, duration, stride):
    x, y = zip(*[(time_series[i-duration:i], outcomes[i]) for i in range(duration, len(time_series), stride)])
    
    return np.array(x), np.array(y)

In [3]:
full_filename = os.path.join(data_file_path, data_filename)
raw_data = pd.read_csv(full_filename, delimiter=',', parse_dates=[1], dayfirst=True)
returns = raw_data.iloc[:,1].values
decisions = raw_data.iloc[:,2].values

x, y = get_samples(returns, decisions, 2500, 1)

In [None]:
x = np.expand_dims(x, -1)

In [17]:
y = np.expand_dims(y, -1)

In [18]:
print(x.shape)
print(y.shape)

(19751, 2500, 1)
(19751, 1, 1)


In [11]:
def get_model():
    dropout = 0.5
    model = keras.models.Sequential()
    
    # Convolutions
    model.add(Conv1D(32, 2, padding='same', activation='relu', input_shape=(None,1)))
    model.add(MaxPooling1D(2, padding='same'))
    model.add(Dropout(dropout))
    model.add(Conv1D(64, 2, padding='same', activation='relu'))
    model.add(MaxPooling1D(2, padding='same'))
    model.add(Dropout(dropout))
    model.add(Conv1D(128, 2, padding='same', activation='relu'))
    model.add(MaxPooling1D(2, padding='same'))
    model.add(Dropout(dropout))
    model.add(Conv1D(128, 2, padding='same', activation='relu'))
    model.add(MaxPooling1D(2, padding='same'))
    model.add(Dropout(dropout))
#     model.add(Conv1D(128, 2, padding='same', activation='relu'))
#     model.add(MaxPooling1D(2, padding='same'))
#     model.add(Dropout(dropout))
#     model.add(Conv1D(128, 2, padding='same', activation='relu'))
#     model.add(MaxPooling1D(2, padding='same'))
#     model.add(Dropout(dropout))
#     model.add(Conv1D(128, 2, padding='same', activation='relu'))
#     model.add(MaxPooling1D(2, padding='same'))
#     model.add(Dropout(dropout))
    
#     # Recurrents
#     model.add(GRU(128, return_sequences=True, go_backwards=True))
#     model.add(Dropout(dropout))
#     model.add(GRU(128, return_sequences=False, go_backwards=False))
#     model.add(Dropout(dropout))
    
    # Dense for final prediction
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(8, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(4, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(2, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation='sigmoid'))
    
    return model

In [12]:
model = get_model()
model.summary()
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_15 (Conv1D)           (None, None, 32)          96        
_________________________________________________________________
max_pooling1d_15 (MaxPooling (None, None, 32)          0         
_________________________________________________________________
dropout_29 (Dropout)         (None, None, 32)          0         
_________________________________________________________________
conv1d_16 (Conv1D)           (None, None, 64)          4160      
_________________________________________________________________
max_pooling1d_16 (MaxPooling (None, None, 64)          0         
_________________________________________________________________
dropout_30 (Dropout)         (None, None, 64)          0         
_________________________________________________________________
conv1d_17 (Conv1D)           (None, None, 128)         16512     
__________

In [19]:
checkpointer = ModelCheckpoint(filepath=model_file, monitor='val_loss', save_best_only=True, verbose=1)
earlystopper = EarlyStopping(monitor='val_loss', patience=10)

training_history = model.fit(x, y, batch_size=batch_size, epochs=num_epochs, verbose=1,
          callbacks=[checkpointer, earlystopper], validation_split=validation_frac)

Train on 15800 samples, validate on 3951 samples
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
