# Bitcoin Opening Price Prediction (using LSTM)

* Dataset: [coin_Bitcoin.csv](https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory?select=coin_Bitcoin.csv)
* Author: Laxman Desai

## Initialization

In [1]:
%matplotlib ipympl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('./dataset/coin_Bitcoin.csv')
df.index = pd.to_datetime(df['Date']).dt.date

df.drop(['Date', 'SNo', 'Name', 'Volume', 'Symbol', 'Marketcap'], axis='columns', inplace=True)
feature = 'Close'

display(df.head())

Unnamed: 0_level_0,High,Low,Open,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-04-29,147.488007,134.0,134.444,144.539993
2013-04-30,146.929993,134.050003,144.0,139.0
2013-05-01,139.889999,107.720001,139.0,116.989998
2013-05-02,125.599998,92.281898,116.379997,105.209999
2013-05-03,108.127998,79.099998,106.25,97.75


In [3]:
print(f'Dataset Shape: {df.shape}\n')
display(df.info())

Dataset Shape: (2862, 4)

<class 'pandas.core.frame.DataFrame'>
Index: 2862 entries, 2013-04-29 to 2021-02-27
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   High    2862 non-null   float64
 1   Low     2862 non-null   float64
 2   Open    2862 non-null   float64
 3   Close   2862 non-null   float64
dtypes: float64(4)
memory usage: 111.8+ KB


None

## Train-Test Split

In [4]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, test_size=0.2, shuffle=False)

## Plotting Closing Prices

In [5]:
def line_plot(train, test, xlabel, ylabel, title):
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(train, label='Train')
    ax.plot(test, label='Test')
    ax.set_xlabel(xlabel, fontsize=12)
    ax.set_ylabel(ylabel, fontsize=12)
    ax.set_title(title, fontsize=16)
    ax.legend(fontsize=12)

line_plot(df_train[feature], df_test[feature], xlabel='Year', ylabel='Bitcoin Price (USD)',
          title='Closing Price of Bitcoin')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Data Preprocessing

### Imputation

* Needed if null values present

In [6]:
df.isnull().sum()

High     0
Low      0
Open     0
Close    0
dtype: int64

### Feature Scaling

In [7]:
# from sklearn.preprocessing import MinMaxScaler

# scaler = MinMaxScaler(feature_range=(0, 1))
# scaler.fit(train)
# train_scaled = pd.DataFrame(scaler.transform(train), index=train.index)
# train_scaled.head()

def scale(df):
    return df / df.iloc[0] - 1

def scale_feature(data, window_len):
    return data[window_len:].values / data[:-window_len].values - 1

In [8]:
def to_time_windows(data, window_len):
    window_list = []
    for i in range(len(data) - window_len):
        window = data[i:i+window_len]
        window_list.append(scale(window).values)
    return np.array(window_list)

window_len = 5

X_train = to_time_windows(df_train, window_len)
X_test  = to_time_windows(df_test , window_len)
y_train = scale_feature(df_train[feature], window_len)
y_test  = scale_feature(df_test [feature], window_len)

## Building the Recurrent Neural Network

### Setup Keras

In [9]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [10]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, LSTM

from tensorflow.compat.v1 import ConfigProto, InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


### Initialising the RNN

In [11]:
def build_model(data, window_len):
    model = Sequential()
    # Layer 1, shape = (window length, no of features)
    model.add(LSTM(units=50, return_sequences=True, input_shape=(window_len, len(data[0][0]))))
    model.add(Dropout(0.2))
    # Layer 2
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    # Layer 3
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    # Layer 4
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    # Output Layer
    model.add(Dense(units=1))
    model.add(Activation('linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

model = build_model(X_train, window_len)

### Fitting the RNN to training set

In [12]:
model.fit(X_train, y_train, epochs=20, batch_size=32, shuffle=False)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x229350f9b50>

## Testing the model

In [13]:
y_pred_scaled = model.predict(X_test).squeeze()

### Mean Error

In [14]:
from sklearn.metrics import mean_absolute_error

print(f'Mean Error: {round(100*mean_absolute_error(y_pred_scaled, y_test), 2)}%')

Mean Error: 2.45%


### Plotting Prediction vs Reality

In [15]:
y_pred = df_test[feature].values[:-window_len] * (y_pred_scaled + 1)


y_pred = pd.Series(data=y_pred, index=df_test[feature][window_len:].index)

In [16]:
fig, ax = plt.subplots(figsize=(12, 5))

ax.plot(df_test[feature][window_len:], label='Actual')
ax.plot(y_pred, label='Prediction')

ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Bitcoin Price (USD)', fontsize=12)
ax.set_title('Closing Price of Bitcoin', fontsize=16)
ax.legend(fontsize=12)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x22987604370>

In [17]:
# y_pred_train = regressor.predict(X_train)
# y_pred_train = scaler.inverse_transform(y_pred_train)

# # y_pred_train = pd.DataFrame(y_pred_train, index=train.index)
# display(y_pred_train)
# len(y_pred_train)

In [18]:
# y_pred_test = regressor.predict(X_test)
# y_pred_test = scaler.inverse_transform(y_pred_test)

# # y_pred_train = pd.DataFrame(y_pred_train, index=train.index)
# display(y_pred_test)

In [19]:
# # Visualising the results

# plt.plot(y_pred_test, color = 'blue', label = 'Predicted Stock Price')
# plt.title('Stock Price Prediction')
# plt.xlabel('Time')
# plt.ylabel('TSLA Stock Price')
# plt.legend()
