In [2]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM
from keras.optimizers import *
from keras.datasets import mnist
from sklearn.model_selection import GridSearchCV

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


import pprint
pp = pprint.PrettyPrinter(indent=4)

## Load Dataset

In [58]:
pred_range=30

In [98]:
split_date='2018-04-25'
# Get normalized market_info
model_data = pd.read_csv('model_data.csv').iloc[:, 1:]
# Fillimisht i kthej ne rend zbrites te dhenat
model_data = model_data.assign(date=pd.to_datetime(model_data['date']))
model_data = model_data.sort_values(by='date')

model_data = model_data[model_data['date']>="2016-01-01"]

training_set, test_set = model_data[model_data['date']<split_date], model_data[model_data['date']>=split_date]
training_set = training_set.drop('date', 1)
test_set = test_set.drop('date', 1)

window_len=31 # sepse len(training_set) pjestohet me :window_len
pred_range=30 # sepse len(test_set) pjestohet me :pred_range


LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    LSTM_training_inputs.append(temp_set)   

LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    LSTM_test_inputs.append(temp_set)
    
LSTM_training_outputs = []

for i in range(window_len, len(training_set['btc_close'])-pred_range):
    LSTM_training_outputs.append(training_set['btc_close'][i:i+pred_range].values)
    
LSTM_training_outputs = np.array(LSTM_training_outputs)


LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

In [99]:
training_set.columns

Index(['btc_high', 'btc_close', 'btc_volume', 'btc_market_cap',
       'bch_avg_block_size', 'bch_transactions', 'bch_mining_revenue',
       'bch_accounts', 'sp_close', 'dj_close', 'google_trends_bitcoin'],
      dtype='object')

In [94]:
np.array_equal(training_set.btc_close, training_set.btc_high.astype(str))

False

## Model Definition

In [96]:
import keras
keras.backend.clear_session()
# %load models/lstm_model.py
"""
    If network is overfitting => decrease batch size; the contrary is true for underfitting
"""
def lstm_model(neurons=100, activ_func="relu",
                dropout=0.25, loss="mae", optimizer="adam"): 
    model = Sequential()
    # input_shape(window_len, number_of_features)
    # neurons is the number of neurons in hidden layers
    model.add(LSTM(neurons=100, input_shape=(31,11)))
    model.add(Dropout(0.25))
    # Units is the length of output vector, which in turn is the pred_range
    model.add(Dense(30))
    model.add(Activation(activ_func="relu"))

    model.compile(loss="mae", optimizer="adam")
    return model

In [62]:
import numpy as np
import matplotlib.pylab as plt


def step(x):
    return np.array(x > 0, dtype=np.int)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

fig1 = plt.figure()
x = np.arange(-5.0, 5.0, 0.1)
y_step = step(x)
y_sigmoid = sigmoid(x)
y_relu = relu(x)
ax1 = fig1.add_subplot(111)
ax1.grid(True)
# plt.plot(x, y_step, label='Step', color='k', lw=1, linestyle=None)
plt.plot(x, y_sigmoid, label='Funksioni Sigmoid', color='b', lw=1, ls='--')
# plt.plot(x, y_relu, label='ReLU', color='k', lw=1, linestyle='-.')
plt.ylim(-0.1, 1.1)
plt.legend()
plt.show()

"""
tanh
~~~~
Plots a graph of the tanh function."""

import numpy as np
import matplotlib.pyplot as plt

z = np.arange(-5, 5, .1)
t = np.tanh(z)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(z, t, label='Funksioni Tanh',  color='b',lw=1, ls='--')
ax.set_ylim([-1.0, 1.0])
ax.set_xlim([-5,5])
ax.grid(True)
ax.set_xlabel('z')
plt.legend()

plt.show()

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

In [6]:
from keras import optimizers
print(optimizers.SGD)

<class 'keras.optimizers.SGD'>


## Define the Parameters

In [70]:
batch_size = [20, 50, 100][:1]
epochs = [20, 50, 100][:1]
neurons = [10, 20, 30, 50][:1]
optimizer = ['adagrad','adam', 'adadelta', 'rmsprop'][:1]
loss=['mae','mean_squared_error']
activation = ['relu', 'tanh', 'sigmoid', 'hard_sigmoid'][:1]
dropout_rate = [0.3, 0.25, 0.8][:1]

## Model Wrapper and GridSearchCV

In [78]:
LSTM_training_outputs

array([[0.01009146, 0.01016341, 0.00993048, ..., 0.01321994, 0.01343734,
        0.01330379],
       [0.01016341, 0.00993048, 0.01094708, ..., 0.01343734, 0.01330379,
        0.01272769],
       [0.00993048, 0.01094708, 0.01078973, ..., 0.01330379, 0.01272769,
        0.01260656],
       ...,
       [0.45040555, 0.43945795, 0.43053734, ..., 0.42010891, 0.44865652,
        0.45123167],
       [0.43945795, 0.43053734, 0.41571382, ..., 0.44865652, 0.45123167,
        0.44641162],
       [0.43053734, 0.41571382, 0.39623278, ..., 0.45123167, 0.44641162,
        0.45305886]])

In [100]:
# Create the wrapper and pass params to GridSearchCV
params = dict(neurons=neurons,
              activ_func = activation,
              dropout = dropout_rate,
              loss = loss,
              optimizer = optimizer)


# inputs=100, output_size=pred_range, neurons=100, activ_func="relu",
#                 dropout=0.25, loss="mae", optimizer="adam"
seed = 7

# models = GridSearchCV(estimator = lstm_model, param_grid=params,scoring = 'neg_mean_squared_error', n_jobs=1)

NN_grid = KerasRegressor(build_fn=lstm_model, verbose = 1, batch_size = 31, epochs = 100)

validator = GridSearchCV(estimator = NN_grid, param_grid = params,scoring = 'neg_mean_squared_error', n_jobs = 1)
             
#  estimator = KerasRegressor(build_fn=lstm_model,  verbose=1, shuffle=True, batch_size = 100,
#                         epochs = 50)   
    
grid_result = validator.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs)

# estimator.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs)

# prediction = estimator.predict(LSTM_test_inputs[:-pred_range])

# models = GridSearchCV(estimator = model, param_grid=params,scoring = 'neg_mean_squared_error', n_jobs=1)

TypeError: __init__() missing 1 required positional argument: 'units'

In [83]:
best_model = validator.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs)
print('Best model :')
pp.pprint(best_model.best_params_)

TypeError: __init__() missing 1 required positional argument: 'units'

## Train the Models

In [12]:
best_model = models.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs)
print('Best model :')
pp.pprint(best_model.best_params_)


# estimator.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs)

# prediction = estimator.predict(LSTM_test_inputs[:-pred_range])

NameError: name 'models' is not defined

In [19]:
pp.pprint(best_model)

GridSearchCV(cv=None, error_score='raise',
       estimator=<keras.wrappers.scikit_learn.KerasRegressor object at 0x7f55f92a3518>,
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'neurons': [10], 'optimizer': ['adagrad'], 'loss': ['mae', 'mean_squared_error'], 'activ_func': ['relu'], 'dropout': [0.3]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=0)
