In [77]:
import numpy as np
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import *
from keras.wrappers.scikit_learn import KerasRegressor
from keras.datasets import mnist
from sklearn.model_selection import GridSearchCV
import pprint
pp = pprint.PrettyPrinter(indent=4)

## Model Definition

In [78]:

# def build_model(optimizer, learning_rate, activation, dropout_rate,
#                 initilizer,num_unit):
#     keras.backend.clear_session()
#     model = Sequential()
#     model.add(Dense(num_unit, kernel_initializer=initilizer,
#                     activation=activation, input_shape=(784,)))
#     model.add(Dropout(dropout_rate))
#     model.add(Dense(num_unit, kernel_initializer=initilizer,
#                     activation=activation))
#     model.add(Dropout(dropout_rate)) 
#     model.add(Dense(10, activation='softmax'))
#     model.compile(loss='categorical_crossentropy',
#                   optimizer=optimizer(lr=learning_rate),
#                   metrics=['accuracy'])
#     return model

In [90]:
# %load models/lstm_model.py
# import the relevant Keras modules
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dropout
from keras import optimizers
"""
    If network is overfitting => decrease batch size; the contrary is true for underfitting
"""
def lstm_model(inputs, output_size, neurons, optimizer, loss, activ_func="linear",
                dropout=0.25): 
    
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer)
    return model

## Load Dataset

In [123]:
split_date = '2018-01-01' 

btc_final = pd.read_csv("btc_final.csv")

# Get rid of 'date' columns
training_set, test_set = btc_final[btc_final['date']<split_date], btc_final[btc_final['date']>=split_date]
training_set = training_set.drop('date', 1)
test_set = test_set.drop('date', 1)

window_len = 10
norm_cols = ['bt_close','bt_volume']

LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_training_inputs.append(temp_set)
    
LSTM_training_outputs = (training_set['bt_close'][window_len:].values/training_set['bt_close'][:-window_len].values)-1

pred_range=10


LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_test_inputs.append(temp_set)
LSTM_test_outputs = (test_set['bt_close'][window_len:].values/test_set['bt_close'][:-window_len].values)-1


LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

print(LSTM_training_inputs[:-pred_range].shape)

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

(1446, 10, 22)


In [112]:
# training_set, test_set = btc_final[btc_final['date']<split_date], btc_final[btc_final['date']>=split_date]

LSTM_training_outputs=LSTM_training_outputs[0:LSTM_training_outputs.size-10]
print(LSTM_training_outputs)
# LSTM_training_outputs = LSTM_training_outputs.reshape(LSTM_training_outputs.shape[1:])

[ 0.32734309  0.11248104  0.14434986 ...  0.17197482  0.07598637
 -0.06722048]


## Define the Parameters

In [113]:
# [:1] is for testing
# batch_size = [1, 20, 50][:1]
# epochs = [1, 20, 50][:1]
# learning_rate = [0.1, 0.001, 0.02][:1]
batch_size = [20, 50, 100][:1]
epochs = [1, 20, 50][:1]
neurons = [10, 20][:1]
optimizer = [SGD, RMSprop, Adagrad, Adadelta, Adam][:1]
loss=['mae','mean_squared_error']
activation = ['relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'][:1]
dropout_rate = [0.3, 0.25, 0.8][:1]

## Model Wrapper and GridSearchCV

In [114]:
# Create the wrapper and pass params to GridSearchCV
params = dict(batch_size = batch_size,
              epochs = epochs,
              neurons = neurons,
              optimizer = optimizer,
              loss = loss,
              activ_func = activation,
              dropout = dropout_rate)

model = KerasRegressor(build_fn=lstm_model,  verbose=2, shuffle=True)

models = GridSearchCV(estimator = model, param_grid=params, n_jobs=1)

## Train the Models

In [117]:
pred_range = 10
# (1446, 10, 22)

LSTM_training_inputs = LSTM_training_inputs[:-pred_range]
# LSTM_training_inputs.shape(1446,22)
print(LSTM_training_inputs[:-pred_range]) # 1446
# best_model = models.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs)
# print('Best model :')
# pp.pprint(best_model.best_params_)

[[[0.96127011 0.60103549 0.98414411 ... 0.37250528 0.56810036 0.87350468]
  [0.96127011 0.60103549 0.98414411 ... 0.37250528 0.56810036 0.87350468]
  [0.96127011 0.60103549 0.98414411 ... 0.37250528 0.56810036 0.87350468]
  ...
  [0.91111685 0.5870407  0.98559501 ... 0.42650857 0.59139785 0.79912353]
  [0.91111685 0.5870407  0.98559501 ... 0.42650857 0.59139785 0.79912353]
  [0.91111685 0.5870407  0.98559501 ... 0.42650857 0.59139785 0.79912353]]

 [[0.96127011 0.60103549 0.98414411 ... 0.37250528 0.56810036 0.87350468]
  [0.96127011 0.60103549 0.98414411 ... 0.37250528 0.56810036 0.87350468]
  [0.95263217 0.5870407  0.98757471 ... 0.35430852 0.54390681 0.86118678]
  ...
  [0.91111685 0.5870407  0.98559501 ... 0.42650857 0.59139785 0.79912353]
  [0.91111685 0.5870407  0.98559501 ... 0.42650857 0.59139785 0.79912353]
  [0.9022009  0.60842538 0.98295761 ... 0.44000939 0.57885305 0.7927277 ]]

 [[0.96127011 0.60103549 0.98414411 ... 0.37250528 0.56810036 0.87350468]
  [0.95263217 0.587040