In [1]:
import math
import keras
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from keras.models import Sequential, model_from_json
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping

from google.colab import files

plt.style.use('default')

In [2]:
market = 'BTC-GBP'
granularity = 3600

In [3]:
def getHistoricalData(market: str='BTC-GBP', granularity: int=3600, iso8601start: str='', iso8601end: str='') -> pd.DataFrame:
  if iso8601start != '' and iso8601end == '':
    resp = requests.get(f'https://api.pro.coinbase.com/products/{market}/candles?granularity={granularity}&start={iso8601start}')
  elif iso8601start != '' and iso8601end != '':
    resp = requests.get(f'https://api.pro.coinbase.com/products/{market}/candles?granularity={granularity}&start={iso8601start}&end={iso8601end}')
  else:
    resp = requests.get(f'https://api.pro.coinbase.com/products/{market}/candles?granularity={granularity}')

  df = pd.DataFrame.from_dict(resp.json())
  df.columns = [ 'epoch','low','high','open','close','volume' ]

  tsidx = pd.DatetimeIndex(pd.to_datetime(df['epoch'], unit='s'), dtype='datetime64[ns]')
  df.set_index(tsidx, inplace=True)
  df = df.drop(columns=['epoch'])
  df.index.names = ['date']

  return df.iloc[::-1].reset_index()

In [4]:
def getHistoricalDataChained(market: str='BTC-GBP', granularity: int=3600, max_interations: int=1) -> pd.DataFrame:
  df1 = getHistoricalData(market, granularity)

  if max_interations == 1:
    return df1

  def getPreviousDateRange(df: pd.DataFrame=None) -> tuple:
    end_date = df['date'].min() - timedelta(seconds=(granularity/60))
    new_start = df['date'].min() - timedelta(hours=300*(granularity/60/60))
    return (str(new_start).replace(' ', 'T'), str(end_date).replace(' ', 'T'))

  iterations = 0
  result_df = pd.DataFrame()
  while iterations < (max_interations - 1):
    start_date, end_date = getPreviousDateRange(df1)
    df2 = getHistoricalData(market, granularity, start_date, end_date)
    result_df = pd.concat([df2, df1]).drop_duplicates()
    df1 = result_df
    iterations = iterations + 1

  if 'date'in result_df:
    result_df.sort_values(by=['date'], ascending=True, inplace=True)
        
  return result_df

In [5]:
iterations = math.ceil((525600/(granularity/60))/300) # minutes in a year divided by 300 data size
df = getHistoricalDataChained(market, granularity, iterations)
iterations, df.shape, df['date'].min(), df['date'].max()

(30,
 (8998, 6),
 Timestamp('2020-06-11 23:00:00'),
 Timestamp('2021-06-21 22:00:00'))

In [6]:
df.head()

Unnamed: 0,date,low,high,open,close,volume
0,2020-06-11 23:00:00,7370.0,7430.0,7427.57,7378.15,37.822718
1,2020-06-12 00:00:00,7356.03,7434.94,7379.36,7420.39,20.963595
2,2020-06-12 01:00:00,7414.26,7452.0,7418.78,7439.05,14.138312
3,2020-06-12 02:00:00,7433.06,7452.0,7439.05,7452.0,11.655047
4,2020-06-12 03:00:00,7441.81,7482.0,7452.0,7452.61,14.175504


In [7]:
df['ema12'] = df['close'].ewm(span=12, adjust=False).mean()
df['ema26'] = df['close'].ewm(span=26, adjust=False).mean()

df['macd'] = df['ema12'] - df['ema26']
df['signal'] = df['macd'].ewm(span=9, adjust=False).mean() 

df['next close'] = df['close'].shift(-1)
df.dropna(inplace=True)
df

Unnamed: 0,date,low,high,open,close,volume,ema12,ema26,macd,signal,next close
0,2020-06-11 23:00:00,7370.00,7430.00,7427.57,7378.15,37.822718,7378.150000,7378.150000,0.000000,0.000000,7420.39
1,2020-06-12 00:00:00,7356.03,7434.94,7379.36,7420.39,20.963595,7384.648462,7381.278889,3.369573,0.673915,7439.05
2,2020-06-12 01:00:00,7414.26,7452.00,7418.78,7439.05,14.138312,7393.017929,7385.558230,7.459699,2.031071,7452.00
3,2020-06-12 02:00:00,7433.06,7452.00,7439.05,7452.00,11.655047,7402.092094,7390.479843,11.612251,3.947307,7452.61
4,2020-06-12 03:00:00,7441.81,7482.00,7452.00,7452.61,14.175504,7409.864079,7395.082077,14.782002,6.114246,7452.34
...,...,...,...,...,...,...,...,...,...,...,...
294,2021-06-21 17:00:00,23074.06,23459.99,23115.53,23167.65,136.392808,23703.962287,24278.951435,-574.989148,-499.411260,23553.20
295,2021-06-21 18:00:00,23150.00,23636.00,23168.26,23553.20,111.539745,23680.768089,24225.192070,-544.423981,-508.413804,23396.58
296,2021-06-21 19:00:00,23186.01,23590.00,23553.19,23396.58,62.804421,23637.046845,24163.813398,-526.766553,-512.084354,23390.00
297,2021-06-21 20:00:00,23250.00,23540.48,23394.36,23390.00,44.555913,23599.039638,24106.493887,-507.454249,-511.158333,22623.43


In [8]:
X = df[['low','high','open','close','volume','ema12','ema26','macd','signal']]
y = df[['next close']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False, stratify=None)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6297, 9), (2700, 9), (6297, 1), (2700, 1))

In [9]:
X_scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = X_scaler.fit_transform(X_train)
X_train = pd.DataFrame(X_scaled, columns=[X_train.columns])

print ('X_train inputs:', X_train.columns)

if granularity == 60:
  window = 1440 # num minutes in 1 day
elif granularity == 300:
  window = 288 # num 5 minutes in 1 day
elif granularity == 900:
  window = 94 # num 15 minutes in 1 day
elif granularity == 3600:
  window = 24 # num hours in 1 day
elif granularity == 21600:
  window = 4 # num 6 hours in 1 day
else:
  window = 1 # 1 day

X_train_window=[]
y_train_window=[]
for i in range(window, len(X_train)):
  X_train_window.append(X_scaled[i-window:i, :])
  y_train_window.append(X_scaled[i, 0])

X_train_window, y_train_window = np.array(X_train_window), np.array(y_train_window)
X_train_window = np.reshape(X_train_window, (X_train_window.shape[0], X_train_window.shape[1], len(X_train.columns)))

print ('X_train_window.shape', X_train_window.shape)
print ('y_train_window.shape', y_train_window.shape)

X_train inputs: ('low',)
X_train_window.shape (6273, 24, 9)
y_train_window.shape (6273,)


In [None]:
def create_model():
  model = Sequential()
  model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_window.shape[1], len(X_train.columns)))) # layer 1 lstm
  model.add(Dropout(0.2)) # layer 1 dropout regularisation
  model.add(LSTM(units=50, return_sequences=True)) # layer 2 lstm
  model.add(Dropout(0.2)) # layer 2 dropout regularisation
  model.add(LSTM(units=50, return_sequences=True)) # layer 3 lstm
  model.add(Dropout(0.2)) # layer 3 dropout regularisation
  model.add(LSTM(units=50)) # layer 4 lstm
  model.add(Dropout(0.2)) # layer 4 dropout regularisation
  model.add(Dense(units=1)) # output layer
  model.compile(optimizer='adam', loss='mean_squared_error') # compile the rnn
  return model

model = create_model()
model.fit(X_train_window, y_train_window, epochs=500, batch_size=32, verbose=2)
model.summary()

Epoch 1/500
197/197 - 23s - loss: 0.0036
Epoch 2/500
197/197 - 2s - loss: 0.0017
Epoch 3/500
197/197 - 2s - loss: 0.0013
Epoch 4/500
197/197 - 2s - loss: 0.0011
Epoch 5/500
197/197 - 2s - loss: 0.0010
Epoch 6/500
197/197 - 2s - loss: 9.4322e-04
Epoch 7/500
197/197 - 2s - loss: 8.8697e-04
Epoch 8/500
197/197 - 2s - loss: 9.3259e-04
Epoch 9/500
197/197 - 2s - loss: 9.2008e-04
Epoch 10/500
197/197 - 2s - loss: 0.0010
Epoch 11/500
197/197 - 2s - loss: 9.7128e-04
Epoch 12/500
197/197 - 2s - loss: 7.9636e-04
Epoch 13/500
197/197 - 2s - loss: 8.3827e-04
Epoch 14/500
197/197 - 2s - loss: 0.0011
Epoch 15/500
197/197 - 2s - loss: 6.9461e-04
Epoch 16/500
197/197 - 2s - loss: 7.1690e-04
Epoch 17/500
197/197 - 2s - loss: 7.8100e-04
Epoch 18/500
197/197 - 2s - loss: 7.0805e-04
Epoch 19/500
197/197 - 2s - loss: 7.3643e-04
Epoch 20/500
197/197 - 2s - loss: 8.0639e-04
Epoch 21/500
197/197 - 2s - loss: 7.0114e-04
Epoch 22/500
197/197 - 2s - loss: 7.1824e-04
Epoch 23/500
197/197 - 2s - loss: 7.1098e-04
E

In [None]:
'''Optionally save model, model.json and weights.h5'''

model_filename = f'model_{market}_{granularity}_{len(X_train.columns)}-inputs.json'
weights_filename = f'weights_{market}_{granularity}_{len(X_train.columns)}-inputs.h5'

!ls /content

# save structure to json
model_json = model.to_json()
with open(model_filename, 'w') as json_file:
  json_file.write(model_json)

# save weights to hdf5
model.save_weights(weights_filename)

files.download(f'/content/{model_filename}')
files.download(f'/content/{weights_filename}')

model_BTC-GBP_3600_1-inputs.json  sample_data  weights_BTC-GBP_3600_1-inputs.h5


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
'''Optionally load model, model.json and weights.h5'''

model_filename = f'model_{market}_{granularity}_{len(X_train.columns)}-inputs.json'
weights_filename = f'weights_{market}_{granularity}_{len(X_train.columns)}-inputs.h5'

try:
  files.upload()
  !ls /content

  # read structure from json
  model = open(model_filename, 'r')
  json = model.read()
  model.close()
  model = model_from_json(json)

  # read weights from hdf5
  model.load_weights(f'/content/{weights_filename}')
except Exception as e:
  print (e)

Saving model_BTC-GBP_3600_1-inputs.json to model_BTC-GBP_3600_1-inputs (1).json
Saving weights_BTC-GBP_3600_1-inputs.h5 to weights_BTC-GBP_3600_1-inputs (1).h5
'model_BTC-GBP_3600_1-inputs (1).json'	'weights_BTC-GBP_3600_1-inputs (1).h5'
 model_BTC-GBP_3600_1-inputs.json	 weights_BTC-GBP_3600_1-inputs.h5
 sample_data


In [None]:
X_scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = X_scaler.fit_transform(X_test)
X_test = pd.DataFrame(X_scaled, columns=[X_test.columns])

print ('X_test inputs:', X_test.columns)

y_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaled = y_scaler.fit_transform(y)
y_test = pd.DataFrame(y_scaled, columns=[y.columns])

print ('y_test inputs:', y_test.columns[0])

X_test_window=[]
y_test_window=[]
for i in range(window, len(X_test)):
  X_test_window.append(X_scaled[i-window:i, :])
  y_test_window.append(X_scaled[i, 0])

X_test_window, y_test_window = np.array(X_test_window), np.array(y_test_window)
X_test_window = np.reshape(X_test_window, (X_test_window.shape[0], X_test_window.shape[1], len(X_test.columns)))

print ('X_test_window.shape:', X_test_window.shape)
print ('y_test_window.shape:', y_test_window.shape)

y_pred = model.predict(X_test_window)
y_pred = y_scaler.inverse_transform(y_pred)

print ('y_pred.shape:', y_pred.shape)

In [None]:
plt.figure(figsize=(30,10))
plt.plot(df['date'].tail(len(y_pred)), df['close'][-len(y_pred):].values, color='red', label=f'Actual {market} {granularity} Daily {y.columns.values[0].title()}')
plt.plot(df['date'].tail(len(y_pred)), y_pred, color='blue', label=f'Predicted {market} {granularity} Daily {y.columns.values[0].title()}')
plt.title(f'{market} {granularity} Daily {y.columns.values[0].title()} Prediction')
plt.xlabel('Time')
plt.ylabel(f'{y.columns.values[0].title()}')
plt.xticks(rotation=90)
plt.legend()
plt.show()