##Multivariate time series prediction from https://github.com/Rachnog/Deep-Trading/tree/master/multivariate

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.recurrent import LSTM, GRU
from keras.layers import Convolution1D, MaxPooling1D, AtrousConvolution1D, RepeatVector
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from keras.layers.wrappers import Bidirectional
from keras import regularizers
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import *
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from keras.initializers import *

import seaborn as sns
sns.despine()

# This is a bit of magic to make matplotlib figures appear inline in the
# notebook rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Data Preprocessing

In [29]:
import transaction_reader as t_r

file = ('../Dataset/preprocess_outputs/port_calc_processed.csv')

WINDOW = 30
EMB_SIZE = 5
STEP = 5
FORECAST = 1
train_percentage = 0.9

reader = t_r.Transaction_Reader(file)

X_train, X_test, Y_train, Y_test = [], [], [], []
x_train_count = int(reader.total_transactions * train_percentage)
for j in range(reader.total_transactions):
    data, labels = reader.next_batch()
    for i in range(0, reader.trans_freqs[reader.current_batch], STEP):
        try:
            x_i = data[i:i+WINDOW]
            y_i = labels[i+WINDOW]
        except Exception as e:
            break
        if reader.current_batch <= x_train_count:
            X_train.append(x_i)
            Y_train.append(y_i)
        else:
            X_test.append(x_i)
            Y_test.append(y_i)

print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], EMB_SIZE))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], EMB_SIZE))

((92805, 30, 5), (10100, 30, 5), (92805, 25), (10100, 25))


# Create and run model

In [30]:
no_class = Y_train.shape[1]

model = Sequential()
model.add(Convolution1D(input_shape = (WINDOW, EMB_SIZE),
                        nb_filter=16,
                        filter_length=4,
                        border_mode='same'))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

model.add(Convolution1D(nb_filter=8,
                        filter_length=4,
                        border_mode='same'))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(64))
model.add(BatchNormalization())
model.add(LeakyReLU())


model.add(Dense(no_class))
model.add(Activation('softmax'))

opt = Nadam(lr=0.002)

reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.9, patience=30, min_lr=0.000001, verbose=1)
checkpointer = ModelCheckpoint(filepath="multivariate.hdf5", verbose=1, save_best_only=True)


model.compile(optimizer=opt, 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, Y_train, 
          nb_epoch = 100, 
          batch_size = 128, 
          verbose=1, 
          validation_data=(X_test, Y_test),
          callbacks=[reduce_lr, checkpointer],
          shuffle=True)

model.load_weights("multivariate.hdf5")
pred = model.predict(np.array(X_test))

  import sys
  


Train on 92805 samples, validate on 10100 samples
Epoch 1/100

Epoch 00001: val_loss did not improve
Epoch 2/100

Epoch 00002: val_loss did not improve
Epoch 3/100

Epoch 00003: val_loss did not improve
Epoch 4/100

Epoch 00004: val_loss did not improve
Epoch 5/100

KeyboardInterrupt: 

# Results

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
C = confusion_matrix([np.argmax(y) for y in Y_test], [np.argmax(y) for y in pred])

print C / C.astype(np.float).sum(axis=1)


# for i in range(len(pred)):
#     print Y_test[i], pred[i]


plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

plt.figure()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()