<a href="https://colab.research.google.com/github/sd3ntato/ISPR_Project/blob/main/btc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This project consists in using DeepESN and GruESN to predict bitcoin stock market price.

I use a dataset from kaggle (https://www.kaggle.com/mczielinski/bitcoin-historical-data) that reports price minute by minute.

#preliminary stuff 

## imports
import necessary libraries, functions and classes

In [1]:
# import mainstream libraries
import numpy as np
import pandas as pd
import math
import plotly.graph_objects as go
from IPython.display import clear_output

# import models from my implementations
from ESN_module import ESN
from DeepESN_module import DeepESN
from utils import * # auxiliary functions

## reading and visualization of data
take the data out of the csv file, plot candlestick chart

In [2]:
# read the dataset with data minute by minute.
# we takethe last 10 000 (approximately 10 weeks) and the same amount for testing
data = pd.read_csv('data.csv')
#data.index = pd.RangeIndex(len(data.index))

print(f'total number of samples given: {len(data)}')
data

total number of samples given: 13000


Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,Date
0,1616366520,57747.16,57798.07,57743.95,57757.46,1.542429,89089.344726,57759.129449,2021-03-21 22:42:00
1,1616366580,57776.54,57776.54,57703.00,57753.17,4.202042,242611.491410,57736.569842,2021-03-21 22:43:00
2,1616366640,57764.47,57775.78,57758.41,57758.41,0.585944,33852.501910,57774.318362,2021-03-21 22:44:00
3,1616366700,57748.37,57817.76,57748.06,57779.23,3.488369,201491.642080,57760.980994,2021-03-21 22:45:00
4,1616366760,57792.07,57806.24,57789.99,57806.24,0.369271,21344.217682,57800.905108,2021-03-21 22:46:00
...,...,...,...,...,...,...,...,...,...
12995,1617148560,58714.31,58714.31,58686.00,58686.00,1.384487,81259.372187,58692.753339,2021-03-30 23:56:00
12996,1617148620,58683.97,58693.43,58683.97,58685.81,7.294848,428158.146640,58693.226508,2021-03-30 23:57:00
12997,1617148680,58693.43,58723.84,58693.43,58723.84,1.705682,100117.070370,58696.198496,2021-03-30 23:58:00
12998,1617148740,58742.18,58770.38,58742.18,58760.59,0.720415,42332.958633,58761.866202,2021-03-30 23:59:00


In [3]:
# visualize and work on some of the data:
plot_candlesticks(data)

##Preprocessing
shift the data separating input from target output, than normalize to 0 mean and 1 varicance and separate training, validation and test data.

In [3]:
# temporal shift: we try and predict the price r minutes ahead.
r = 3

# put data inside a matrix, only ohlc values
dataset_x = data[['Open','High','Low','Close']].to_numpy()

# normalize data to 0 mean and 1 variance
dataset_x = normalize(dataset_x)

# separate input data from desired_output data: the data is shifted to the left by r positions,
# so that it goes ahead by r temporal steps w.r.t. dataset_x . 
# data_x | 1 | 2 | 3 | 4 | 1 | 2 | 3 | 4 |
#        | <-  <-  <-  <-  <-  <-  <-  <-| 
#    | / | 2 | 3 | 4 | 1 | 2 | 3 | 4 | / |
dataset_y = np.roll(dataset_x,-r,axis=0)

# discard first and last r data-points because they are not meaningfull anymore
dataset_x = dataset_x[r:np.size(dataset_x)-r]
dataset_y = dataset_y[r:np.size(dataset_y)-r]

########### check that data is properly re-arranged ###########
# y[t] = x[t+r]  
# y[t] is r steps in the future w.r.t. x[t]
for t in range(np.size(dataset_x,axis=0)-r):
  assert np.linalg.norm(dataset_x[t+r] - dataset_y[t])==0.0

# separate training data from validation and test.
train_x = dataset_x[:4000]
train_y = dataset_y[:4000]

valid_x = dataset_x[4000:5000]
valid_y = dataset_y[4000:5000]

test_x = dataset_x[5000:]
test_y = dataset_y[5000:]

print(f'\nDATA SHAPES \ntraining data: {np.shape(train_x)},{np.shape(train_y)},\nvalid data: {np.shape(valid_x)},{np.shape(valid_y)}, \ntest data: {np.shape(test_x)}, {np.shape(test_y)}')


DATA SHAPES 
training data: (4000, 4),(4000, 4),
valid data: (1000, 4),(1000, 4), 
test data: (7997, 4), (7997, 4)


# LOCAL IMPLEMENTATIONS
ESN and DeepESN

In [4]:
# When working with my implementation of ESN and DeepESN, the convention is that every data point is a column vector
# reshape data so that each data point is a column vector, we have a tensor containing datapoints in the form of column vector
train_x = train_x.reshape(-1,4,1)
train_y = train_y.reshape(-1,4,1)
valid_x = valid_x.reshape(-1,4,1)
valid_y = valid_y.reshape(-1,4,1)
test_x = test_x.reshape(-1,4,1)
test_y = test_y.reshape(-1,4,1)

## ESN shallow

In [5]:
# now that we got the data ready, we can go ahead and train a network on it, then asses the results. 
# start with an initial very simple trial using a shallow ESN, just to see if the framework is working.

# instantiate the network object
n = ESN(Nu=4, Ny=4, rho=1, Nr=100, r_density =0.1, i_density =1)

# train the network on training data.
#        train_x       train_y      washout sequence
n.train(train_x[501:],train_y[501:],train_x[:500])

# run the network on the test set and compute mse. (save output sequence to plot it togheter with real data)
err , out = n.score(test_x,test_y)
print(f'MSE on test dataset: {err}')

MSE on test dataset: 0.0018043557936802538


In [6]:
# arrange the obtained data in dataframes so that we can pretty-plot them
real = ohlc_matrix_to_dataframe(test_y.reshape(-1,4)) 
predicted = ohlc_matrix_to_dataframe(out.reshape(-1,4),r=r)

# then actually plot the results:
# plot candlestick graph with an orange indicating the predicted Low value.
fig = plot_comparison_candlesticks_with_predicted_low(real, predicted)
fig.show()

## DeepESN readout concat

In [None]:
# for DeepESN I have two versions: in the first version the readout is connected the recurrent units in all reservoirs

# instantiate the network object
n = DeepESN(Nu=4, Ny=4,N=4,Nr=10,rho=0.3)

# train the network
#             train_x       train_y      washout sequence
n.train_concat(train_x[501:],train_y[501:],train_x[:500])

# run the network on the test set and compute mse. (save output sequence to plot it togheter with real data)
err , out = n.score_concat(test_x,test_y)
print(f'MSE on test dataset: {err}')

MSE on test dataset: 0.0017160756439112681


In [None]:
# we arrange the obtained data in dataframes so that we can pretty-plot them
real = ohlc_matrix_to_dataframe(test_y.reshape(-1,4)) 
predicted = ohlc_matrix_to_dataframe(out.reshape(-1,4),r=r)

# then actually plot the results:
# we plot candlestick graph with a bar indicating the predicted Low value.
fig = plot_comparison_candlesticks_with_predicted_low(real, predicted)
fig.show()

##DeepESN readout ultimo layer

In [None]:
# in the second variation the readout is connected only to hidden units of the last layer in the stack

Nl=3 # number of recurrent layers
#instantiate the network
n = DeepESN(Nu=4, Ny=4,N=Nl,Nr=30, rho=0.2)

# train the network
#        train_x       train_y      washout sequence
n.train(train_x[501:],train_y[501:],train_x[:500],Nl-1)

# run the network on the test set and compute mse. (save output sequence to plot it togheter with real data)
err , out = n.score(test_x,test_y,Nl-1)
print(f'MSE on test dataset: {err}')

MSE on test dataset: 0.0018053697670817723


In [None]:
# we arrange the obtained data in dataframes so that we can pretty-plot them
real = ohlc_matrix_to_dataframe(test_y.reshape(-1,4)) 
predicted = ohlc_matrix_to_dataframe(out.reshape(-1,4),r=r)

# then actually plot the results:
# we plot candlestick graph with a bar indicating the predicted Low value.
fig = plot_comparison_candlesticks_with_predicted_low(real, predicted)
fig.show()

# KERAS

In [11]:
# when working with keras:
# - each point in time is a row-vector
# - I divide the sequence into sub-sequences of lenght 80
tx = train_x.reshape(-1,80,4)
ty = train_y.reshape(-1,80,4)

vx = valid_x.reshape(1,-1,4)
vy = valid_y.reshape(1,-1,4)

ttx = test_x.reshape(1,-1,4)
tty = test_y.reshape(1,-1,4)

## GruESN
Create the GruESN RZ layer, then plug it into a keras model and test

In [7]:
import tensorflow as tf
import tensorflow.keras as keras
from scipy.stats import uniform
import scipy.sparse as s

# custom initializer to build recurrent weight matrix with given spectral radius
class Recurrent_sparse_initializer(tf.keras.initializers.Initializer):
    def __call__(self, shape, dtype=None):
      # create sparse matrix with values in [-1,1]
      wrandom = s.random(shape[0],shape[1],density = 0.1, data_rvs=uniform(loc=-1,scale=2).rvs ).todense() # matrice sparsa con valori in distribuzione uniforme tra -1 e 1
      # rescale it to set spectral radius
      w = wrandom * ( 0.9 / max(np.abs(np.linalg.eigvals(wrandom))) )
      return np.array(w)

# custom cell to plug into keras.layers.RNN
class GruESNRZCell(keras.layers.Layer):

  def __init__(self, units, **kwargs):
    self.state_size = units
    super().__init__(**kwargs)

  def build(self, input_shape):
    """
      instantiate all the necessary weight matricesx
    """
    #parameters for reset gate
    self.Wrin = self.add_weight( shape=(input_shape[-1], self.state_size), initializer="uniform", trainable =True)
    self.Wr = self.add_weight(shape=(self.state_size, self.state_size), initializer="uniform", trainable =True)

    # parameters for update gate
    self.Wzin = self.add_weight(shape=(input_shape[-1], self.state_size), initializer="uniform", trainable =True)
    self.Wz = self.add_weight(shape=(self.state_size, self.state_size), initializer="uniform", trainable =True)

    # non-trainable parameters
    self.Win = self.add_weight(shape=(input_shape[-1], self.state_size), initializer="uniform", trainable =False)
    self.W = self.add_weight(shape=(self.state_size, self.state_size), initializer=Recurrent_sparse_initializer(), trainable =False)

    self.built = True
    
  def call(self, inputs, states):
    """
      implement equation of GRU neuron
    """
    u = inputs
    x_tm1 = states[0]

    r = tf.math.sigmoid( tf.matmul(inputs, self.Wrin ) + tf.matmul( x_tm1, self.Wr) )
    z = tf.math.sigmoid( tf.matmul(inputs, self.Wzin ) + tf.matmul( x_tm1, self.Wz) )
    h = tf.math.tanh( tf.matmul(inputs, self.Win) + tf.matmul( tf.math.multiply(r, x_tm1), self.W ) )
    x = tf.math.multiply( z, x_tm1) + tf.math.multiply( (tf.ones(self.state_size) - z), h  )

    return x, [x]


In [12]:
# instantiate the keras model and compile compile it
model = keras.models.Sequential([
      #keras.layers.GRU(100, stateful=True, return_sequences=True, batch_input_shape=(1, 10000, 4)), # stateful one
      keras.layers.RNN( GruESNRZCell(100),return_sequences=True, input_shape=(None, 4)),
      #keras.layers.GRU( 100,return_sequences=True, input_shape=(None, 4)),
      #keras.layers.Dense(10),  
      keras.layers.Dense(4), 
])
model.compile(optimizer="rmsprop", loss="mse", sample_weight_mode="temporal",)
print(model.summary())

# train the model
model.fit(tx,ty,epochs=20, batch_size=1, shuffle=True)
clear_output()

# run the model on the test set to get the predictions
out = model.predict(ttx)

# arrange the obtained data in dataframes so that we can pretty-plot them
real = ohlc_matrix_to_dataframe(test_y.reshape(-1,4)) 
predicted = ohlc_matrix_to_dataframe(out.reshape(-1,4),r=r)

# then actually plot the results:
# plot candlestick graph with a bar indicating the predicted Low value.
fig = plot_comparison_candlesticks_with_predicted_low(real, predicted)
fig.show()

err = MSE( out.reshape(-1,4), tty.reshape(-1,4), 500)
print(f'MSE on test dataset: {err}')

MSE on test dataset: 0.0019775625199651837


#tabular data

## calculations

In [None]:
# to assess performances I try various initializations

n_trials = 50 # number of initializations

# collect results on the test set on arrays

In [None]:
## shallow ESN
errs_ESN = [None]*n_trials
for i in range(n_trials):
  n = ESN(Nu=4, Ny=4,rho=1, Nr=100, r_density =0.1, i_density =1)
  n.train(train_x[501:],train_y[501:],train_x[:500])
  err , _ = n.score(test_x,test_y)
  errs_ESN[i] = err

In [None]:
# deepESN Concat
errs_DeepESN_concat = [None]*n_trials
for i in range(n_trials):
  n = DeepESN(Nu=4, Ny=4,N=4,Nr=10,rho=0.3)
  n.train_concat(train_x[501:],train_y[501:],train_x[:500])
  err , out = n.score_concat(test_x,test_y)
  errs_DeepESN_concat[i] = err

In [None]:
# deepESN last layer
errs_DeepESN_last = [None]*n_trials
for i in range(n_trials):
  Nl=3
  n = DeepESN(Nu=4, Ny=4,N=Nl,Nr=30, rho=0.2)
  n.train(train_x[501:],train_y[501:],train_x[:500],Nl-1)
  err , out = n.score(test_x,test_y,Nl-1)
  errs_DeepESN_last[i] = err

In [None]:
# gruESN
errs_GruESN = [None]*n_trials
for i in range(n_trials):
  model = keras.models.Sequential([
      keras.layers.RNN( GruESNRZCell(100),return_sequences=True, input_shape=(None, 4)),
      keras.layers.Dense(4), 
  ])
  model.compile(optimizer="rmsprop", loss="mse", sample_weight_mode="temporal",)
  model.fit(tx,ty,epochs=15, batch_size=1, shuffle=True,verbose=False)
  out = model.predict(ttx)
  err = MSE( out.reshape(-1,4), tty.reshape(-1,4), 500)
  print(f'MSE on test dataset: {err}')
  clear_output(wait=True)
  errs_GruESN[i] = err

In [None]:
# GRU
errs_Gru = [None]*n_trials
for i in range(n_trials):
  model = keras.models.Sequential([
      keras.layers.GRU( 100, return_sequences=True, input_shape=(None, 4)),
      keras.layers.Dense(4), 
  ])
  model.compile(optimizer="adam", loss="mse", sample_weight_mode="temporal",)
  model.fit(tx,ty,epochs=20, batch_size=1, shuffle=True)
  out = model.predict(ttx)
  err = MSE( out.reshape(-1,4), tty.reshape(-1,4), 500)
  print(f'MSE on test dataset: {err}')
  clear_output(wait=True)
  errs_Gru[i] = err

In [None]:
errs = {
    'ESN':errs_ESN,
    'DeepESN concat':errs_DeepESN_concat,
    'DeepESN last':errs_DeepESN_last,
    'GruESN': errs_GruESN,
    'GRU': errs_Gru
}
errs = pd.DataFrame(errs)

## Plotting

In [None]:
errs.plot(kind='box', rot='30')

In [None]:
e1 = errs.drop(errs['DeepESN concat'].idxmax()).drop(errs['ESN'].idxmax())
e1 = e1.drop(e1['ESN'].idxmax())
e1 = e1.drop(e1['ESN'].idxmax())
e1.plot(kind='box',rot=30)

In [None]:
errs.describe()

In [None]:
errs.to_numpy()