# Imports + Notebook Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight

import keras
from keras.utils.vis_utils import plot_model
from keras.models import Model, Sequential
from keras.optimizers import Adam, SGD
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Input, Dense, Bidirectional, Dropout, GlobalAveragePooling1D
from keras.layers.recurrent import LSTM
from keras.models import Model
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.convolutional import Conv1D, MaxPooling1D

from sklearn.metrics import accuracy_score, confusion_matrix, precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score 

from sklearn.decomposition import PCA

from bioinfokit.visuz import cluster

ModuleNotFoundError: ignored

# Data Processing

In [None]:
btc_meta = pd.read_csv('Bitcoin-Metadata.csv')
btc_meta = btc_meta.drop(['SNo', 'Name', 'Symbol', 'Date'], axis = 1)
btc_meta_r = btc_meta.copy()

def f(x):
  if (x['Close2'] > x['Close']):
    return 1
  else:
    return 0

btc_meta_c = btc_meta.copy()
btc_meta_c['Direction'] = btc_meta_c.apply(f, axis = 1)
btc_meta_c = btc_meta_c.drop(btc_meta_c.columns[-2], axis = 1)

# LSTM - Classification

## Structuring

In [None]:
#getting the price-related features from the dataframe
features = btc_meta_c[['High', 'Low', 'Open', 'Close', 'Rel_Close', 'HL_Ratio', 
                       'Rel_High', 'Rel_Low', 'SMA7', 'SMA30', 'SMA60', 
                       'SMA90', 'SMA200']].values
X_temp = np.array(features)
y_temp = np.array(btc_meta_c['Direction'])

# make X_train a 3 dimensional array for LSTM input shape
n_instances = X_temp.shape[0]
n_features = X_temp.shape[1]
n_lookback = 90

X = []
y = []
for i in range(n_instances - n_lookback):
    X.append(X_temp[i: i + n_lookback, :])
    y.append(y_temp[i + n_lookback])

X = np.array(X)
y = np.array(y)

## Create Model

In [None]:
model = Sequential()
model.add(LSTM(512, input_shape = (n_lookback, n_features), return_sequences = True))
model.add(LSTM(256, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(128, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(64, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(16, dropout = 0.1, recurrent_dropout = 0.1, return_sequences = False))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
model.fit(X_train, y_train, batch_size = 64,
          epochs = 5, validation_split = 0.05,
          callbacks = [EarlyStopping(patience = 2)])

## Evaluation

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]
lstm_metrics = (accuracy_score(y_test, y_pred), 
                f1_score(y_test, y_pred, average = 'binary'))
lstm_metrics

# LSTM - Regression

## Structuring

In [None]:
X_temp = np.array(btc_meta_c.iloc[:, :-1])
y_temp = np.array(btc_meta_r['Close2'])

# make X_train a 3 dimensional array for LSTM input shape
n_instances = X_temp.shape[0]
n_features = X_temp.shape[1]
n_lookback = 90

X = []
y = []
for i in range(n_instances - n_lookback):
    X.append(X_temp[i: i + n_lookback, :])
    y.append(y_temp[i + n_lookback])

X = np.array(X)
y = np.array(y)

## Create Model

In [None]:
model = Sequential()
model.add(LSTM(512, input_shape = (n_lookback, n_features), return_sequences = True))
model.add(LSTM(256, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(128, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(64, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(16, dropout = 0.1, recurrent_dropout = 0.1, return_sequences = False))
model.add(Dense(1))
model.compile(loss = 'mean_absolute_error', 
              optimizer = 'adam')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
model.fit(X_train, y_train, batch_size = 128,
          epochs = 8, validation_split = 0.05,
          callbacks = [EarlyStopping(patience = 2)])

## Evaluation

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_pred = model.predict(X_test)
y_pred = [x for x in y_pred]
lstm_metrics = (mean_absolute_error(y_test, y_pred), 
                r2_score(y_test, y_pred))
lstm_metrics

# CNN - Classification

## Structuring

In [None]:
# getting the price-related features from the dataframe
features = btc_meta_c[['High', 'Low', 'Open', 'Close', 'Rel_Close', 'HL_Ratio', 
                       'Rel_High', 'Rel_Low', 'SMA7', 'SMA30', 'SMA60', 
                       'SMA90', 'SMA200']].values
X_temp = np.array(features)
y_temp = np.array(btc_meta_c['Direction'])

# make X_train a 3 dimensional array for CNN input shape
n_instances = X_temp.shape[0]
n_features = X_temp.shape[1]
n_lookback = 90

X = []
y = []
for i in range(n_instances - n_lookback):
    X.append(X_temp[i: i + n_lookback, :])
    y.append(y_temp[i + n_lookback])

X = np.array(X)
y = np.array(y)

## Create Model

In [None]:
model = Sequential()
model.add(Conv1D(1, 50, activation='relu', input_shape = (n_lookback, n_features)))
for rate in (1, 2, 4, 8) * 2:
    model.add(Conv1D(filters = 20, kernel_size = 2, padding = 'causal',
                      activation = 'relu', dilation_rate = rate))
    model.add(Dropout(0.1))
model.add(Conv1D(filters = 10, kernel_size = 1))
model.add(GlobalAveragePooling1D())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
model.fit(X_train, y_train, batch_size = 64,
          epochs = 20, validation_split = 0.05,
          callbacks = [EarlyStopping(patience = 5)])

## Evaluation

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]
cnn_metrics = (accuracy_score(y_test, y_pred), 
              f1_score(y_test, y_pred, average = 'binary'))
cnn_metrics

# CNN - Regression

## Structuring

In [None]:
X_temp = np.array(btc_meta_c.iloc[:, :-1])
y_temp = np.array(btc_meta_r['Close2'])

# make X_train a 3 dimensional array for CNN input shape
n_instances = X_temp.shape[0]
n_features = X_temp.shape[1]
n_lookback = 90

X = []
y = []
for i in range(n_instances - n_lookback):
    X.append(X_temp[i: i + n_lookback, :])
    y.append(y_temp[i + n_lookback])

X = np.array(X)
y = np.array(y)

## Create Model

In [None]:
model = Sequential()
model.add(Conv1D(1, 50, activation='relu', input_shape = (n_lookback, n_features)))
for rate in (1, 2, 4, 8) * 2:
    model.add(Conv1D(filters = 20, kernel_size = 2, padding = 'causal',
                      activation = 'relu', dilation_rate = rate))
    model.add(Dropout(0.1))
model.add(Conv1D(filters = 10, kernel_size = 1))
model.add(GlobalAveragePooling1D())
model.add(Dense(10))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss = 'mean_absolute_error', 
              optimizer = 'adagrad')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
model.fit(X_train, y_train, batch_size = 64,
          epochs = 100, validation_split = 0.05,
          callbacks = [EarlyStopping(patience = 20)])

## Evaluation

In [None]:
model.evaluate(X_test, y_test)

In [None]:
y_pred = model.predict(X_test)
y_pred = [x for x in y_pred]
cnn_metrics = (mean_absolute_error(y_test, y_pred), 
               r2_score(y_test, y_pred))
cnn_metrics

# Further Experiments

## Downscaling the Model 

Our current price prediction model uses 90 days of historical prices to predict the next-day price, creating a memory-intensive operation given both the number of rows and columns in the training dataste. Furthermore, we know that days far before the next day are likely to be less significant that days closer to it. As a result, we can apply some dimensionality reduction techniques and see whether or not our optimised regression model still maintains its performance.

In [None]:
# getting the price-related features from the dataframe
features = btc_meta_c[['High', 'Low', 'Open', 'Close', 'Rel_Close', 'HL_Ratio', 
                       'Rel_High', 'Rel_Low', 'SMA7', 'SMA30', 'SMA60', 
                       'SMA90', 'SMA200']].values
X_temp = np.array(features)
y_temp = np.array(btc_meta_c['Direction'])

sc = StandardScaler()
X_temp = sc.fit_transform(X_temp)

pca = PCA()
pca = pca.fit(X_temp)
var = pca.explained_variance_
num = pca.n_features_
names = [str(i) for i in list(range(1, num + 1))]
cluster.screeplot(obj = [names, pca.explained_variance_ratio_], dim = (18, 5), show = True)

In [None]:
pca = PCA(n_components = 6)
pca = pca.fit(X_temp)
X_temp = pca.transform(X_temp)

In [None]:
n_instances = X_temp.shape[0]
n_features = X_temp.shape[1]
n_lookback = 90

X = []
y = []
for i in range(n_instances - n_lookback):
    X.append(X_temp[i: i + n_lookback, :])
    y.append(y_temp[i + n_lookback])

X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
model = Sequential()
model.add(Conv1D(1, 50, activation='relu', input_shape = (n_lookback, n_features)))
for rate in (1, 2, 4, 8) * 2:
    model.add(Conv1D(filters = 20, kernel_size = 2, padding = 'causal',
                      activation = 'relu', dilation_rate = rate))
    model.add(Dropout(0.1))
model.add(Conv1D(filters = 10, kernel_size = 1))
model.add(GlobalAveragePooling1D())
model.add(Dense(1, activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])
model.fit(X_train, y_train, batch_size = 64,
        epochs = 20, validation_split = 0.05,
        callbacks = [EarlyStopping(patience = 5)])

In [None]:
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]
cnn_metrics = (accuracy_score(y_test, y_pred), 
               f1_score(y_test, y_pred, average = 'binary'))
cnn_metrics

## Extending to Other Cryptocurrencies

In [None]:
eth_meta = pd.read_csv('Ethereum-Metadata.csv')
eth_meta = eth_meta.drop(['SNo', 'Name', 'Symbol', 'Date'], axis = 1)

In [None]:
def f(x):
  if (x['Close2'] > x['Close']):
    return 1
  else:
    return 0

eth_meta['Direction'] = eth_meta.apply(f, axis = 1)
eth_meta = eth_meta.drop(['Close2'], axis = 1)

eth_meta.head()

In [None]:
features = eth_meta[['High', 'Low', 'Open', 'Close', 'Rel_Close', 'HL_Ratio', 
                       'Rel_High', 'Rel_Low', 'SMA7', 'SMA30', 'SMA60', 
                       'SMA90', 'SMA200']].values
X_temp = np.array(features)
y_temp = np.array(eth_meta['Direction'])

sc = StandardScaler()
X_temp = sc.fit_transform(X_temp)

n_instances = X_temp.shape[0]
n_features = X_temp.shape[1]
n_lookback = 90

X = []
y = []
for i in range(n_instances - n_lookback):
    X.append(X_temp[i: i + n_lookback, :])
    y.append(y_temp[i + n_lookback])

X = np.array(X)
y = np.array(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [None]:
model = Sequential()
model.add(Conv1D(1, 50, activation='relu', input_shape = (n_lookback, n_features)))
for rate in (1, 2, 4, 8) * 2:
    model.add(Conv1D(filters = 20, kernel_size = 2, padding = 'causal',
                      activation = 'relu', dilation_rate = rate))
    model.add(Dropout(0.1))
model.add(Conv1D(filters = 10, kernel_size = 1))
model.add(GlobalAveragePooling1D())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])
model.fit(X_train, y_train, batch_size = 64,
          epochs = 20, validation_split = 0.05,
          callbacks = [EarlyStopping(patience = 5)])

In [None]:
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]
cnn_metrics = (accuracy_score(y_test, y_pred), 
               f1_score(y_test, y_pred, average = 'binary'))
cnn_metrics