In [None]:
# BitCoin Price Prediction with LSTM without dropout and with dense layer after every LSTM layer

In [None]:
#Importing Libraries

import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import Bidirectional
from keras.models import Sequential
from sklearn.metrics import mean_squared_error, confusion_matrix
import math
import seaborn as sns


In [None]:
def load_data(filename):

    """
    To load bitcoin price prediction data
    
    Inputs:
    filename -> A string that represents where the data file can be located
    
    Outputs:
    data -> Dataframe of bitcoin price prediction data
    """
    
    #Load the data
    data = pd.read_csv(filename)
                
    return data

In [None]:
def data_prepocessing(data, d, train_data_size):
    
    """
    To preproces and split the data to make it ready for training and testing
    
    Input:
    data -> Dataframe of bitcoin price prediction data
    d -> number of days to look at in each sample of data 
    train_data_size -> Size according to which data is splited in training and testing
    
    Output: 
    x_train -> x values of training data
    y_train -> labels of training data
    x_test -> x values of testing data
    y_test -> labels of testing data
    y_daybefore -> the price of bitcoin the day before for each x_test
    unnormalized_labels -> unnormalized label data will be used to get the true prices from the normalized ones
    window_size -> An integer that represents how many days of data the model can look at at once
    
    """
    
    
    #Change all zeros to the number before the zero occurs
    data = data.values
    
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            if(data[i][j]==0):
                data[i][j] = data[i-1][j]
                
                
    #convert data to list
    data = data.tolist()
    
    #Convert the data to a 3D array (a x b x c) 
    #Where a is the number of days(n-d+1), b is the window size(d), and c is the number of features in the data file(here 34)
    
    
    data3D = []
    for i in range(len(data)-d):
        data3D.append(data[i : i + d])
    
    #Normalizing data by going through each window as range of features is highly variable
    #Every value in the window is divided by the first value in the window, and then 1 is subtracted
    d1 = np.array(data3D)
    dr = np.zeros_like(d1)
    dr[:,1:,:] = (d1[:,1:,:] / d1[:,0:1,:]) - 1  
    
    #Splitting of data set(eg train_data_size = 0.90, training data is first 90% of data points and testing data last 10% of data points)
    split = round(train_data_size * dr.shape[0])
    #print(split)
    
    #Keeping the unnormalized prices for labels
    #To retrieve unnormalized predicted price of bitcoin later
    start = split-1
    end = int(dr.shape[0] + 1)
    unnormalized_labels = d1[start:end,0:1,20]
    
    #Training Data
    train_data = dr[:int(split), :]
    
    x_train = train_data[:, :-1]
    y_train = train_data[:, -1]
    y_train = y_train[:, 20]
    
    #print(x_train.shape)
    #print(x_train)

    #print(y_train.shape)
    #print(y_train)
    
    
    #Testing data
    x_test = dr[int(split):, :-1]
    y_test = dr[int(split):, d-1, :]
    y_test = y_test[:, 20]

    #print(x_test.shape)
    #print(x_test)

    #print(y_test.shape)
    #print(y_test)

    #Get the day before price for each x_test
    y_daybefore = dr[int(split):, d-2, :]
    y_daybefore = y_daybefore[:, 20]

    #print(y_daybefore)
    
    #Get window size and sequence length
    sequence_length = d
    window_size = sequence_length-1 #last value is reserved as the y value
    
    return x_train, y_train, x_test, y_test, y_daybefore, unnormalized_labels, window_size

In [None]:
def model(window_size, dropout_value, activation_function, loss_function, optimizer):
    
    """
    To initialize and create model
    
    Input:
    window_size -> Represents how many days of data the model can look at at once
    dropout_value -> Represents how much dropout should be incorporated at each level
    activation_function -> Activation function 
    loss_function -> Loss function 
    optimizer -> Optimizer 
    
    Output:
    model -> A 3 layer RNN with dense layer in each layer
    """
    
    #Create a Sequential model using Keras
    model = Sequential()

    #First recurrent layer with dropout
    model.add(LSTM(window_size, return_sequences=True, input_shape=(window_size, N_x_train.shape[-1]),))
    model.add(Dense(units=64,activation=activation_function))
    
    #Second recurrent layer with dropout
    model.add(LSTM((window_size*2), return_sequences=True))
    model.add(Dense(units=32,activation=activation_function))

    #Third recurrent layer
    model.add(LSTM(window_size, return_sequences=False))

    #Output layer (returns the predicted value)
    model.add(Dense(units=1, activation=activation_function))

    #Set loss function and optimizer
    model.compile(loss=loss_function, optimizer=optimizer)
    
    return model
  

In [None]:
def test_model(model, N_x_test, N_y_test, unnormalized_labels):
  
    """
    For model Testing
    
    Input:
    model -> LSTM
    N_x_test -> Normalized x values of the testing data
    N_y_test -> Normalized labels of the testing data
    unnormalized_labels -> unnormalized label data will be used to get the true prices from the normalized ones
    
    Output:
    N_y_predict -> Normalized predicted values on x_test
    y_test -> Unnormalized actual prices of bitcoin throughout the testing period
    y_predict -> Unnormalized predicted prices of bitcoin
    fig -> graph of the predicted prices vs the actual prices of bitcoin
    """
    
    #Test the model on Normalized x_test
    N_y_predict = model.predict(N_x_test)
    
    #Retrieve unnormalized values
    y_test = np.zeros_like(N_y_test)
    y_predict = np.zeros_like(N_y_predict)
    
    for i in range(N_y_test.shape[0]):
        y = N_y_test[i]
        predict = N_y_predict[i]
        y_test[i] = (y+1)*unnormalized_labels[i]
        y_predict[i] = (predict+1)*unnormalized_labels[i]

    #Plot of the predicted prices versus the actual prices
    fig = plt.figure(figsize=(10,5))
    ax = fig.add_subplot(111)
    ax.set_title("Bitcoin Price Over Time")
    plt.plot(y_predict, color = 'green', label = 'Predicted Price')
    plt.plot(y_test, color = 'red', label = 'Actual Price')
    ax.set_ylabel("Price (USD)")
    ax.set_xlabel("Time (Days)")
    ax.legend()
    plt.show()
    
    return N_y_predict, y_test, y_predict, fig

In [None]:
def price_change(N_y_daybefore, N_y_test, N_y_predict):
    
    """
    Calculate the percent change between each value and the day before
    
    Input:
    y_test -> Normalized labels of the testing data
    y_daybefore -> Normalized price of bitcoin the day before for each x_test
    y_predict -> Normalized predicted values on x_test
    
    Output:
    N_y_test -> Normalized labels of the testing data
    N_y_daybefore -> the price of bitcoin the day before for each x_test
    delta_predict -> Represents the difference between predicted(y_predict) and day before price(y_daybefore) of bitcoin
    delta_test -> Represents the difference between actual(y_test) and day before price(y_daybefore) of bitcoin
    fig -> A plot representing percent change in bitcoin price per day
    
    """
    
    #Reshaping Y_daybefore and Y_test
    N_y_daybefore = np.reshape(N_y_daybefore, (-1, 1))
    N_y_test = np.reshape(N_y_test, (-1, 1))

    #The difference between each predicted value and the value from the day before
    delta_predict = (N_y_predict - N_y_daybefore) / (1+N_y_daybefore)

    #The difference between each true value and the value from the day before
    delta_test = (N_y_test - N_y_daybefore) / (1+N_y_daybefore)
    
    #Plotting the predicted percent change versus the actual percent change
    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111)
    ax.set_title("Percent Change in Bitcoin Price Per Day")
    plt.plot(delta_predict, color='green', label = 'Predicted Percent Change')
    plt.plot(delta_test, color='red', label = 'Actual Percent Change')
    plt.ylabel("Percent Change")
    plt.xlabel("Time (Days)")
    ax.legend()
    plt.show()
    
    return delta_predict, delta_test, fig

In [None]:
def binary_price(delta_predict, delta_test):
    """
    Converts percent change to a binary 1 or 0, where 1 is an increase and 0 is a decrease/no change in bitcoin price
    
    Input:
    delta_predict -> Represents the predicted percent change in price
    delta_test -> Represents the actual percent change in price
    
    Output:
    delta_predict_binary -> Represents if predicted price increase or decrease wrt day before
    delta_test_binary -> Represents if actual price increase or decrease wrt day before
    """
    
    #Empty arrays where 1 represents an increase in price and 0 represents a decrease in price
    delta_predict_binary = np.empty(delta_predict.shape)
    delta_test_binary = np.empty(delta_test.shape)

    #If the change in price is greater than zero, store it as a 1
    #If the change in price is less than zero, store it as a 0
    for i in range(delta_predict.shape[0]):
        if delta_predict[i][0] > 0:
            delta_predict_binary[i][0] = 1
        else:
            delta_predict_binary[i][0] = 0
    for i in range(delta_test.shape[0]):
        if delta_test[i][0] > 0:
            delta_test_binary[i][0] = 1
        else:
            delta_test_binary[i][0] = 0    

    return delta_predict_binary, delta_test_binary

In [None]:
def find_positives_negatives(delta_predict_binary, delta_test_binary):
    """
    Finding the number of false positives, false negatives, true positives, true negatives
    
    Input: 
    delta_predict_binary -> Represents if predicted price increase or decrease wrt day before
    delta_test_binary -> Represents if actual price increase or decrease wrt day before
    
    Output:
    true_pos -> Represents the number of true positives achieved by the model
    false_pos -> Represents the number of false positives achieved by the model
    true_neg -> Represents the number of true negatives achieved by the model
    false_neg -> Represents the number of false negatives achieved by the model
    """
    #Finding the number of false positive/negatives and true positives/negatives
    true_pos = 0
    false_pos = 0
    true_neg = 0
    false_neg = 0
    for i in range(delta_test_binary.shape[0]):
        test = delta_test_binary[i][0]
        predicted = delta_predict_binary[i][0]
        if test == 1:
            if predicted == 1:
                true_pos += 1
            else:
                false_neg += 1
        elif test == 0:
            if predicted == 0:
                true_neg += 1
            else:
                false_pos += 1
    return true_pos, false_pos, true_neg, false_neg

In [None]:
def calculate_statistics(true_pos, false_pos, true_neg, false_neg, N_y_predict, N_y_test):
    """
    Calculate various statistics to assess performance
    
    Input:
    true_pos -> Represents the number of true positives achieved by the model
    false_pos -> Represents the number of false positives achieved by the model
    true_neg -> Represents the number of true negatives achieved by the model
    false_neg -> Represents the number of false negatives achieved by the model
    N_y_test -> Normalized labels of the testing data
    N_y_predict -> Normalized predicted values on x_test
    
    Output:
    precision -> How many selected items are relevent? (TP/TP+FP)
    recall -> How many relevent items are selected? (TP/TP+FN)
    F1 -> The weighted average of recall and precision
    Mean Squared Error -> The average of the squares of the differences between predicted and actual bitcoin price
    
    """
    
    precision = float(true_pos) / (true_pos + false_pos)
    recall = float(true_pos) / (true_pos + false_neg)
    F1 = float(2 * precision * recall) / (precision + recall)
    MSE = mean_squared_error(N_y_predict.flatten(), N_y_test.flatten())
    RMSE = math.sqrt(MSE)

    return precision, recall, F1, MSE, RMSE

In [None]:
# Loading dataset

data = load_data("Bitcoin_Price_Prediction_data.csv")

# Analysis of data
# Exploratory data analysis
data.info()
data.head()

In [None]:
data.describe()

In [None]:
# Checking null values

data.apply(lambda x : sum(x.isnull()), axis=0)

#no null value found

In [None]:
# Correlational Matrix

corr = data.corr()
ax = sns.heatmap(corr, vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True, linewidths = 0.005)

ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);

# Saving correlational matrix as image
plt.savefig('corr.png', bbox_inches='tight', pad_inches=0.0)

# Market Capitalization and Output Value are highly correlated with Bitcoin Price

In [None]:
# How Bitcoin Price change with Market Capitalization

x = data['Market Capitalization']
y = data['Bitcoin Price']
plt.title("Bitcoin Price change with Market Capitalization")
plt.xlabel("Market Capitalization")
plt.ylabel("Bitcoin Price")
plt.plot(x,y)


In [None]:
# How Bitcoin Price change with dollar value of all outputs sent over the network

x = data['Output Value']
y = data['Bitcoin Price']
plt.title("Bitcoin Price change with Output value")
plt.xlabel("Output Value ")
plt.ylabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
data['Bitcoin Price'].plot()
plt.ylabel("Daily Bitcoin price")
plt.title("Bitcoin Price")

In [None]:
# Growth in the total network computations over the past 365 days
fig = plt.figure(figsize=(7, 5))
ax1 = fig.add_subplot(111)
ax1.set_title("Growth in the total network computations over the past 365 days")
x = data['Annual Hash Growth']
plt.plot(x, color='b', label = 'Annual Hash Growth')
plt.ylabel("Annual Hash Growth")
ax1.legend()
plt.show()
    
#The amount of dollars earned by the mining network
fig = plt.figure(figsize=(7, 5))
ax2 = fig.add_subplot(111)
ax2.set_title("The amount of dollars earned by the mining network")
x = data['Miner Revenue Value']
plt.plot(x, color='b', label = 'Miner Revenue Value')
plt.ylabel("Miner Revenue Value")
ax2.legend()
plt.show()

#Average amount of time between blocks
fig = plt.figure(figsize=(7, 5))
ax2 = fig.add_subplot(111)
ax2.set_title("Average amount of time between blocks")
x = data['Block Interval']
plt.plot(x, color='b', label = 'Block Interval')
plt.ylabel("Block Interval")
ax2.legend()
plt.show()
    

In [None]:

# How Bitcoin Price changes with unspent transaction dollar value

x = data['Avg. UTXO Value']
y = data['Bitcoin Price']
plt.title("Bitcoin Price change with unspent transaction dollar value")
plt.xlabel("Average UTXO Value")
plt.ylabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
# How Difficulty changes with total number of transactions

x = data['Total Transactions']
y = data['Difficulty']
plt.title("Difficulty changes with total number of transactions")
plt.xlabel("Total Transactions")
plt.ylabel("Difficulty")
plt.plot(x,y)

In [None]:
# How Difficulty changes with Market Capitalization

x = data['Market Capitalization']
y = data['Difficulty']
plt.title("Difficulty changes with Market Capitalization")
plt.xlabel("Market Capitalization")
plt.ylabel("Difficulty")
plt.plot(x,y)

In [None]:

# How Bitcoin Price change with money supply

x = data['Money Supply']
y = data['Bitcoin Price']
plt.title("Bitcoin Price change with money supply")
plt.xlabel("Money Supply")
plt.ylabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
# How Bitcoin Price change with amount of bitcoin sent over network

y = data['Output Volume']
x = data['Bitcoin Price']
plt.title("Bitcoin Price change with amount of bitcoin sent over network")
plt.ylabel("Output Volume")
plt.xlabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
# How Bitcoin Price changes with proportion of the money supply transacted each day

y = data['Velocity - Daily']
x = data['Bitcoin Price']
plt.title("Bitcoin Price change with proportion of the money supply transacted each day")
plt.ylabel("Velocity-Daily")
plt.xlabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
# How amount of bitcoin transacted each day changes with proportion of money supply transacted each day 

y = data['Velocity - Daily']
x = data['Output Value']
plt.title("Amount of bitcoin transacted each day changes with proportion of money supply transacted each day")
plt.ylabel("Velocity-Daily")
plt.xlabel("Output Value")
plt.plot(x,y)

In [None]:
# How Bitcoin Price changes with number of blocks

x = data['Block Height']
y = data['Bitcoin Price']
plt.title("Bitcoin Price change with Block Height")
plt.xlabel("Block Height")
plt.ylabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
# How Bitcoin Price changes with Transaction Fees

x = data['Transaction Fees']
y = data['Bitcoin Price']
plt.title("Bitcoin Price change with Transaction Fees")
plt.xlabel("Transaction Fees")
plt.ylabel("Bitcoin Price")
plt.plot(x,y)

In [None]:
#Data Preprocessing for training and testing of data

N_x_train, N_y_train, N_x_test, N_y_test, N_y_daybefore, unnormalized_labels, window_size = data_prepocessing(data, 50, 0.90)
print (N_x_train.shape)
print (N_y_train.shape)
print (N_x_test.shape)
print (N_y_test.shape)
print (N_y_daybefore.shape)
print (unnormalized_labels.shape)
print (window_size)

In [None]:
#Model Initialization

model = model(window_size, 0.2, 'linear', 'mse', 'adam')
print (model.summary())

In [None]:
start = time.time()

#Train the model on training data 
model.fit(N_x_train, N_y_train, batch_size= 1024, epochs=50, validation_split= 0.1)

#Get the time it took to train the model (in seconds)
training_time = int(math.floor(time.time() - start))

print(training_time)

In [None]:
# Model testing with testing data
N_y_predict, y_test, y_predict, fig1 = test_model(model, N_x_test, N_y_test, unnormalized_labels)

#Show the plot
plt.show(fig1)

In [None]:
# To calculate how actual and predicted price changed wrt day before price of bitcoin
delta_predict, delta_test, fig2 = price_change(N_y_daybefore, N_y_test, N_y_predict)

#Show the plot
plt.show(fig2)

In [None]:
# To calculateif bitcoin predicted and actual price increase or decreases wrt day before price of bitcoin
delta_predict_binary, delta_test_binary = binary_price(delta_predict, delta_test)

#print (delta_predict_binary.shape)
#print (delta_test_binary.shape)

In [None]:
# To calculate True_pos, False_pos, True_neg and False_neg
true_pos, false_pos, true_neg, false_neg = find_positives_negatives(delta_predict_binary, delta_test_binary)
print ("True positives:", true_pos)
print ("False positives:", false_pos)
print ("True negatives:", true_neg)
print ("False negatives:", false_neg)

cm = confusion_matrix(delta_test_binary, delta_predict_binary) 
print(cm) 

In [None]:
# Find accuracy of model

precision, recall, F1, MSE, RMSE = calculate_statistics(true_pos, false_pos, true_neg, false_neg, N_y_predict, N_y_test)
print ("Precision:", precision)
print ("Recall:", recall)
print ("F1 score:", F1)
print ("Mean Squared Error:", MSE)
print("Root Mean Squared Error:", RMSE)