# Bitcoin price prediction using LSTM


In [1]:
import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout,GRU
from tensorflow.keras.optimizers import Adam


2022-01-28 19:17:39.554831: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-28 19:17:39.554868: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Read Data

In [3]:
df = pd.read_csv("data/btc.csv")

df = df.set_index(pd.DatetimeIndex(df.iloc[:,0].values)).iloc[:,1:]

df

Unnamed: 0,timestamp,open,close,high,low,volume,amount
2017-10-04,1507075200,4307.0,4307.0,4307.0,4307.0,0.029360,1.264520e+02
2017-10-05,1507161600,3500.0,3500.0,3500.0,3500.0,0.010368,3.628769e+01
2017-10-06,1507248000,4860.0,4860.0,4860.0,4860.0,0.015000,7.290000e+01
2017-10-07,1507334400,3850.0,4858.0,4858.0,3850.0,0.013568,6.231645e+01
2017-10-09,1507507200,4500.0,4850.0,4850.0,3900.0,0.280437,1.292948e+03
...,...,...,...,...,...,...,...
2022-01-14,1642118400,42569.6,43067.3,43459.9,41713.0,7377.521045,3.150763e+08
2022-01-15,1642204800,43067.3,43074.0,43800.9,42564.1,4595.816660,1.982395e+08
2022-01-16,1642291200,43074.1,43072.6,43477.0,42600.0,4614.782988,1.987266e+08
2022-01-17,1642377600,43072.6,42202.2,43176.9,41500.0,5972.716996,2.533701e+08


In [14]:
fig = go.Figure(
data = [
    go.Candlestick(
    x = df.index,
        low = df['low'],
        high = df['high'],
        close = df['close'],
        open = df['open'],
        increasing_line_color = "green",
        decreasing_line_color = "red")])

fig.update_layout(
    yaxis_title = "Price",
    xaxis_title = "Date"
)

fig.show()

## Data Preparation

### Normalization

In [15]:
data = df[['close','volume','open','low','high']]
num_features = data.shape[1]

In [16]:
scaler = MinMaxScaler()
norm_data = scaler.fit_transform(data.values)
print("Real: {}\nNormalized: {}".format(data.values[0],norm_data[0]))

Real: [4.307000e+03 2.935964e-02 4.307000e+03 4.307000e+03 4.307000e+03]
Normalized: [1.69958641e-02 1.28317550e-06 1.69991604e-02 1.82029975e-02
 1.58985405e-02]


### Data split

In [17]:
past_history = 10
future_target = 0
split_rate = int(len(norm_data) * 0.8) # 80 percent

In [18]:
input_data = []
output_data = []

for i in range(past_history, len(norm_data)+1): 
    indices = range(i-past_history, i)

    input_data.append(np.reshape(norm_data[indices], (past_history, num_features)))
    try:
        output_data.append(norm_data[i+future_target][0])
    except:
        output_data.append(np.nan)

input_data,output_data = np.array(input_data), np.array(output_data)

In [19]:
x_train, x_test = input_data[:split_rate], input_data[split_rate:]
y_train, y_test = output_data[:split_rate], output_data[split_rate:]

## Build the model

In [20]:
num_units = 100
activation_function = 'relu'
loss_function = 'mean_absolute_error'
batch_size = 32
num_epochs = 100

model = Sequential()

model.add(LSTM(units = num_units,return_sequences=True,activation=activation_function, input_shape=(None, num_features)))
model.add(Dropout(0.1))

model.add(LSTM(units=num_units,activation=activation_function))
model.add(Dropout(0.1))

model.add(Dense(units = 1))

model.compile(optimizer=Adam(learning_rate=0.0001), loss=loss_function)

In [21]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, None, 100)         42400     
_________________________________________________________________
dropout_2 (Dropout)          (None, None, 100)         0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 122,901
Trainable params: 122,901
Non-trainable params: 0
_________________________________________________________________


## Train the model

In [None]:
history = model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=num_epochs,
    shuffle=False
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure()

plt.plot(range(len(loss)), loss, 'blue', label='Training loss')
plt.plot(range(len(loss)), val_loss, 'red', label='Validation loss')
plt.title("Training and Validation Loss")
plt.xlabel("Epoch")
plt.legend()

plt.show()

In [None]:
original_train = y_train - scaler.min_[0]
original_train /= scaler.scale_[0]
original_train = pd.DataFrame((original_train))

predictions_train = model.predict(x_train) - scaler.min_[0]
predictions_train /= scaler.scale_[0]
predictions_train = pd.DataFrame((predictions_train))

plt.figure()

plt.plot(original_train, 'blue', label='Train Data')
plt.plot(predictions_train, 'red', label='Prediction')
plt.title("Bitcoin price")
plt.xlabel("Days")
plt.ylabel("Price (USD)")
plt.legend()
plt.show()
print("Train error:")
print(mean_absolute_error(y_train, model.predict(x_train)))

## Prediction

In [None]:
original_test = y_test - scaler.min_[0]
original_test /= scaler.scale_[0]
original_test = pd.DataFrame((original_test))

predictions_test = model.predict(x_test) - scaler.min_[0]
predictions_test /= scaler.scale_[0]
predictions_test = pd.DataFrame((predictions_test))

plt.figure()

plt.plot(original_test[:-1], 'blue', label='Test Data')
plt.plot(predictions_test[:-1], 'red', label='Prediction')
plt.title("Bitcoin price")
plt.xlabel("Days")
plt.ylabel("Price (USD)")
plt.legend()
plt.show()
print("Test error:")
print(mean_absolute_error(y_test[:-(future_target+1)], model.predict(x_test)[:-(future_target+1)]))

In [70]:
prediction = pd.DataFrame({"actual price":original_test[0],"predicted":predictions_test[0]})

with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(prediction)


     actual price     predicted
0         54096.1  53754.695312
1         54348.4  53186.695312
2         52307.4  51559.832031
3         51301.7  49794.070312
4         55032.0  47346.472656
5         55820.4  45865.414062
6         55772.9  46514.832031
7         57628.9  48530.488281
8         58754.6  51091.078125
9         58745.9  53960.179688
10        58735.7  55972.402344
11        58963.6  56848.246094
12        57058.3  57096.621094
13        58201.4  56473.425781
14        59116.2  54923.050781
15        57988.3  53518.343750
16        55958.2  52504.050781
17        58076.7  51137.941406
18        58131.6  50265.316406
19        59770.2  50331.113281
20        60007.6  50916.628906
21        59863.4  52203.582031
22        63578.7  53747.890625
23        62958.7  55574.894531
24        63152.6  57193.839844
25        61342.6  58704.867188
26        59995.2  59173.250000
27        56150.6  58475.226562
28        55618.8  55617.609375
29        56427.8  52449.906250
30      