In this notebook we will explore using a recurrent neural network (RNN) and long short term memory applications (LSTM) of the keras library to predict prices.

In [3]:
# import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, r2_score, mean_squared_error, mean_absolute_error, matthews_corrcoef, classification_report, roc_auc_score
from xgboost import XGBRegressor, XGBClassifier
from sklearn.model_selection import RepeatedKFold, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

In [4]:
from google.colab import files
uploaded = files.upload()


Saving data_new_features.csv to data_new_features.csv


In [5]:
# import our data 

data = pd.read_csv('data_new_features.csv')



In [6]:
data.head()

Unnamed: 0,date,close,Volume BTC,Volume USDT,7day_MA,50day_MA,200day_MA,24h_vBTC,24h_vUSDT,Label,7day_wma,50day_wma,200day_wma
0,2018-09-26 12:00:00,6452.115,1543.675,9997567.94,6577.944583,6561.698358,7390.488019,16023.49,103770900.0,1.0,6527.313964,6732.691616,8070.491508
1,2018-09-27 01:00:00,6503.8,1407.06,9169317.6,6579.006964,6561.6673,7389.493567,16072.69,104177000.0,1.0,6527.336299,6732.291142,8069.810691
2,2018-09-27 02:00:00,6512.935,1391.14,9050773.03,6580.345119,6561.588092,7388.48115,16143.985,104695500.0,0.0,6527.463245,6731.906202,8069.133475
3,2018-09-27 03:00:00,6503.72,1063.475,6920079.985,6581.489345,6561.542342,7387.513317,15713.075,101942800.0,0.0,6527.476457,6731.506188,8068.452214
4,2018-09-27 04:00:00,6496.275,927.785,6033148.635,6582.556012,6561.741625,7386.502323,15352.69,99626350.0,0.0,6527.396652,6731.094064,8067.767648


In [7]:
# we are going to continue with this as a classification problem, as we are most concerned with correctly predicting direction of bitcoin price movement

data.data = data.date.apply(lambda x: pd.to_datetime(x))

data.set_index('date', inplace=True)

# set up train and testing sets

test_size = 90 * 24    # 24 hourly periods per day
train_size = len(data.index) - test_size

train = data[:train_size]
test = data[train_size:]

X_train = train.drop('Label', axis=1)
X_test = test.drop('Label', axis=1)
y_train = train.Label
y_test = test.Label

  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
# we are going to scale our data in this notebook prior to initializing our model
from sklearn.preprocessing import MinMaxScaler


scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
X_train_scaled.shape

(17372, 11)

In [10]:
# in order to pass our data into an LSTM model we have to reshape it into a 3D array, in this first instance, we are going to use the previous 24 time steps to predict the following hour

num_steps = 24

# set up a list and convert to array taking previous observations timesteps into account
X_ = []
y_ = []
for i in range(24, X_train_scaled.shape[0]):
  X_.append(X_train_scaled[i-num_steps:i, 0])
  y_.append(y_train[i])

X_train_final, y_train_final = np.array(X_), np.array(y_)

X_t = []
y_t = []
for j in range(24, X_test_scaled.shape[0]):
  X_t.append(X_test_scaled[i-num_steps:j, 0])
  y_t.append(y_test[j])

X_test_final, y_test_final = np.array(X_t), np.array(y_t)

X_train_rs = np.reshape(X_train_final, (X_train_final.shape[0], X_train_final.shape[1], 1))

X_test_rs = np.reshape(X_test_final, (X_test_final.shape[0], X_test_final.shape[1], 1))


In [11]:
# initializing our model

model = Sequential()
model.add(LSTM(units=30, activation='relu', return_sequences=True, input_shape=(X_train_rs.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=60, activation='relu', return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=90, activation='relu', return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(units=120, activation='relu', return_sequences=True))
model.add(Dropout(0.5))
model.add(Dense(units=1))

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 24, 30)            3840      
                                                                 
 dropout (Dropout)           (None, 24, 30)            0         
                                                                 
 lstm_1 (LSTM)               (None, 24, 60)            21840     
                                                                 
 dropout_1 (Dropout)         (None, 24, 60)            0         
                                                                 
 lstm_2 (LSTM)               (None, 24, 90)            54360     
                                                                 
 dropout_2 (Dropout)         (None, 24, 90)            0         
                                                                 
 lstm_3 (LSTM)               (None, 24, 120)           1

In [12]:
# now we will compile our model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
# set our history of model training

history = model.fit(X_train_rs, y_train_final, epochs=500, batch_size=32, validation_split=0.15)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [1]:
# Epoch 500/500
# 461/461 [==============================] - 39s 84ms/step - loss: 0.6926 - accuracy: 0.5189 - val_loss: 0.6937 - val_accuracy: 0.5017 

# results from LSTM model. This is not a very encouraging sign, as this is a similar accuracy we were achieving out of our xgboost classification models.

# given the time and deadline provided, we are going to go with our previous results having over 60% confidence, as this gave us our desired minimum viable product accuracy of 55%
# after finishing presentation, will go back and attempt to achieve better results using different techniques. 