# Stock Price BTC-USD with Stacked-Bidirectional LSTM-RNN
- Percobaan 1 = Vanilla LSTM-RNN
- Percobaan 2 = Stacked LSTM-RNN
- Percobaan 3 = Bidirectional LSTM-RNN
- Percobaan 4 = Stacked-Bidirectional LSTM-RNN

## 1. Deklarasi Pustaka

In [None]:
# pustaka untuk manipulasi data-frame
import pandas as pd
from pandas import concat
from pandas import read_csv
from pandas import read_excel
from pandas_datareader import DataReader

# pustaka untuk madnipulasi data-array
import numpy as np
from numpy import concatenate
from numpy import array

# pustaka untuk waktu komputasi
import time
from datetime import datetime

# pustaka untuk visualisasi data
import seaborn as sns
from matplotlib import pyplot
from matplotlib import pyplot as plt

# pustaka untuk visualisasi acf dan pacf
import scipy.stats as sc
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
        
# pustaka untuk membuat data latih dan data uji.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# pustaka untuk membuat model prediksi LSTM-RNN
import itertools
import tensorflow as tf
from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import SimpleRNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.layers import BatchNormalization
from keras.optimizers import Adam, Adamax, RMSprop, SGD

# early stoping
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# pustaka untuk  evaluasi model prediksi
import math
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

## 2. Akuisisi Data

In [None]:
# Set waktu komputasi
start = time.time()

In [None]:
# fix random seed for reproducibility
np.random.seed(1234);

In [None]:
# membaca dataset via csv file
dataset = read_csv("dataset/BTC-USD.csv", parse_dates=['Date']);

In [None]:
# set index tanggal
dataset = dataset.set_index("Date");

In [None]:
# menampilkan metadata dataset
dataset.info();

In [None]:
dataset

## 3. Eksplorasi Data Analisis

In [None]:
# membuat frame
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, facecolor="#f0f0f0", figsize=(20, 10))

# membuat time series plot
ax1.plot(dataset.index.values, dataset["Open"], color="tab:blue", label="Open Price", linewidth=2)

# membuat label-label
ax1.set_title("Open Price Bitcoin", fontsize=14)
ax1.set_xlabel("Date", fontsize=12)
ax1.set_ylabel("Stock Price", fontsize=12)
ax1.legend(loc='upper left')
ax1.grid(True)
# ----------------------------------------------------------------------------------------------------

# membuat time series plot
ax2.plot(dataset.index.values, dataset["Close"], color="tab:green", label="Close Price", linewidth=2)

# membuat label-label
ax2.set_title("Close Price Bitcoin", fontsize=14)
ax2.set_xlabel("Date", fontsize=12)
ax2.set_ylabel("Stock Price", fontsize=12)
ax2.legend(loc='upper left')
ax2.grid(True)
# ----------------------------------------------------------------------------------------------------

# membuat time series plot
ax3.plot(dataset.index.values, dataset["High"], color="tab:orange", label="High Price", linewidth=2)

# membuat label-label
ax3.set_title("High Price Bitcoin", fontsize=14)
ax3.set_xlabel("Date", fontsize=12)
ax3.set_ylabel("Stock Price", fontsize=12)
ax3.legend(loc='upper left')
ax3.grid(True)
# ----------------------------------------------------------------------------------------------------

# membuat time series plot
ax4.plot(dataset.index.values, dataset["Low"], color="tab:red", label="Low Price", linewidth=2)

# membuat label-label
ax4.set_title("Low Price Bitcoin", fontsize=14)
ax4.set_xlabel("Date", fontsize=12)
ax4.set_ylabel("Stock Price", fontsize=12)
ax4.legend(loc='upper left')
ax4.grid(True)
# ----------------------------------------------------------------------------------------------------

# set the spacing between subplots
plt.subplots_adjust(wspace=0.15, hspace=0.25)

# menampilkan plot
plt.show()

## 4. Praproses Dataset

- Seleksi Fitur

In [None]:
# memilih fitur close price
dataset = dataset.filter(['Close']);

In [None]:
# convert dataframe to series close price
data = dataset.values

In [None]:
np.round(data[:5],7)

In [None]:
data.shape

- Normalisasi Data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(np.array(data).reshape(-1,1))

In [None]:
np.round(scaled_data[:5],7)

In [None]:
scaled_data.shape

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(dataset.index.values, scaled_data, color="tab:blue", label="CLose Price", linewidth=2)

# membuat label-label
ax.set_title("Close Price Bitcoin", fontsize=12)
ax.set_xlabel("Date", fontsize=10)
ax.set_ylabel("Stock Price", fontsize=10)
ax.legend(loc='upper left')
ax.grid(True)

# menampilkan plot
plt.show()

- split dataset

In [None]:
# split data train and test
train_data, test_data = train_test_split(scaled_data, train_size=0.80, test_size=0.20, shuffle=False)

In [None]:
# data train
np.round(train_data[:5],7)

In [None]:
train_data.shape

In [None]:
# data test
np.round(test_data[:5],7)

In [None]:
test_data.shape

- Supervised Learning

In [None]:
# convert an array of values into a dataset matrix

# function for supervised learning
def create_dataset(look_back, dataset):
    
    # declare variable X and Y
    dataX = []
    dataY = []
    
    # for loop for create supervised learning
    for i in range(look_back, len(dataset)):
        
        # insert value X and Y 
        dataX.append(dataset[i-look_back:i, 0])
        dataY.append(dataset[i, 0])
        
    # return value X and Y
    return np.array(dataX), np.array(dataY)

In [None]:
# process supervised learning
look_back = 60
x_train, y_train = create_dataset(look_back, train_data)
x_test, y_test = create_dataset(look_back, test_data)

In [None]:
print(x_train.shape, y_train.shape)

In [None]:
print(x_test.shape, y_test.shape)

In [None]:
# reshape input to be [samples, time steps, features]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
print(x_train.shape, x_test.shape)

## 5. Building LSTM-RNN Models

### Percobaan 1 - Vanilla LSTM-RNN

In [None]:
# The LSTM architecture
model_1 = tf.keras.Sequential([
    
    # The input layer
    tf.keras.layers.LSTM(units=50, input_shape=(x_train.shape[1], 1)),
    
    # The output layer
    tf.keras.layers.Dense(1)
])

In [None]:
# Compile the model LSTM
model_1.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# fit network
history_1 = model_1.fit(x_train, y_train, epochs=50, batch_size=8, verbose=1, use_multiprocessing=True, shuffle=False)

In [None]:
model_1.summary()

In [None]:
testPredictions_1 = model_1.predict(x_test)
print(testPredictions_1[:7])

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(history_1.history['loss'], color="tab:blue", label="loss function", linewidth=2)

# membuat label-label
ax.set_xlabel("epoch", fontsize=10)
ax.set_ylabel("loss function", fontsize=10)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

#### Evaluasi Model - Percobaan 1 Vanilla LSTM-RNN

In [None]:
scores_1 = model_1.evaluate(x_train, y_train)
scores_1

In [None]:
scores_1 = model_1.evaluate(x_test, y_test)
scores_1

- MAE (Mean Absolute Error)

In [None]:
mae_1 = mean_absolute_error(y_test, testPredictions_1)
print('Test MAE : %.4f' % mae_1)

- MSE (Mean Squared Error)

In [None]:
mse_1 = mean_squared_error(y_test, testPredictions_1)
print('Test MSE: %.4f' % mse_1)

- RMSE (Root Mean Squared Error)

In [None]:
mse_1 = sqrt(mse_1)
print('Test RMSE: %.4f' % mse_1)

- Corelation pearson

In [None]:
hasil_1 = np.stack((y_test.reshape(-1), testPredictions_1.reshape(-1)), axis=1)
hasil_1 = pd.DataFrame(hasil_1, columns = ['data_aktual','prediksi'])

In [None]:
hasil_1

In [None]:
r1, p1 = sc.pearsonr(hasil_1["data_aktual"], hasil_1["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r1)+ "dengan signifikansi" +" {:.4f} ".format(p1))

#### Visualisasi Data - Percobaan 1 Vanilla LSTM-RNN

In [None]:
# inverse value test predictions
testPredictions_1 = scaler.inverse_transform(testPredictions_1)

In [None]:
# shift test predictions for plotting
testPredictionsPlot_1 = np.empty_like(scaled_data)
testPredictionsPlot_1[:, :] = np.nan
testPredictionsPlot_1[(len(dataset) - testPredictions_1.shape[0]):len(dataset), :] = testPredictions_1

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (15,5))

# membuat time series plot
ax.plot(dataset.index.values, scaler.inverse_transform(scaled_data), color="tab:blue", label="data aktual", linewidth=2)
ax.plot(dataset.index.values, testPredictionsPlot_1, color="tab:red", label="data prediksi", linewidth=2)

# membuat label-label
ax.set_title("Percobaan 1 - Model The Vanilla LSTM-RNN", fontsize=14)
ax.legend(loc="best")
ax.grid(True)

# menampilka
plt.show()

### Percobaan 3 - Stacked LSTM-RNN 

In [None]:
# The LSTM architecture
model_2 = tf.keras.Sequential([
    
    # First LSTM layer with Dropout regularisation
    tf.keras.layers.LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)),
    
    # Secound LSTM layer with Dropout regularisation
    tf.keras.layers.LSTM(units=50, return_sequences=True),
    
    # Third LSTM layer with Dropout regularisation
    tf.keras.layers.LSTM(units=50, return_sequences=False),
    
    # The output layer
    tf.keras.layers.Dense(1)
])

In [None]:
# Compile the model LSTM
model_2.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# fit network
history_2 = model_2.fit(x_train, y_train, epochs=50, batch_size=8, verbose=1, use_multiprocessing=True, shuffle=False)

In [None]:
model_2.summary()

In [None]:
testPredictions_2 = model_2.predict(x_test)
print(testPredictions_2[:7])

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(history_2.history['loss'], color="tab:blue", label="loss function", linewidth=2)

# membuat label-label
ax.set_xlabel("epoch", fontsize=10)
ax.set_ylabel("loss function", fontsize=10)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

#### 6.1 Evaluasi Model Stacked LSTM-RNN

In [None]:
scores_2 = model_1.evaluate(x_train, y_train)
scores_2

In [None]:
scores_2 = model_1.evaluate(x_test, y_test)
scores_2

- MAE (Mean Absolute Error)

In [None]:
mae_2 = mean_absolute_error(y_test, testPredictions_2)
print('Test MAE : %.4f' % mae_2)

- MSE (Mean Squared Error)

In [None]:
mse_2 = mean_squared_error(y_test, testPredictions_2)
print('Test MSE: %.4f' % mse_2)

- RMSE (Root Mean Squared Error)

In [None]:
rmse_2 = sqrt(mse_2)
print('Test RMSE: %.4f' % rmse_2)

- Corelation pearson

In [None]:
hasil_2 = np.stack((y_test.reshape(-1), testPredictions_2.reshape(-1)), axis=1)
hasil_2 = pd.DataFrame(hasil_2, columns = ['data_aktual','prediksi'])

In [None]:
hasil_2

In [None]:
r2, p2 = sc.pearsonr(hasil_2["data_aktual"], hasil_2["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r2)+ "dengan signifikansi" +" {:.4f} ".format(p2))

#### 6.2 Visualisasi Data Stacked LSTM-RNN

In [None]:
# inverse value test predictions
testPredictions_2 = scaler.inverse_transform(testPredictions_2)

In [None]:
# shift test predictions for plotting
testPredictionsPlot_2 = np.empty_like(scaled_data)
testPredictionsPlot_2[:, :] = np.nan
testPredictionsPlot_2[(len(dataset) - testPredictions_2.shape[0]):len(dataset), :] = testPredictions_2

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (15,5))

# membuat time series plot
ax.plot(dataset.index.values, scaler.inverse_transform(scaled_data), color="tab:blue", label="data aktual", linewidth=2)
ax.plot(dataset.index.values, testPredictionsPlot_2, color="tab:red", label="data prediksi", linewidth=2)

# membuat label-label
ax.set_title("Percobaan 2 - Model Stacked LSTM-RNN", fontsize=14)
ax.legend(loc="best")
ax.grid(True)

# menampilka
plt.show()

### 7 Bidirectional LSTM-RNN 

In [None]:
# The LSTM architecture
model_3 = tf.keras.Sequential([
    
    # The input layer
    tf.keras.layers.Bidirectional(
        LSTM(units=50, input_shape=(x_train.shape[1], 1))
    ),
    
    # The output layer
    tf.keras.layers.Dense(1)
])

In [None]:
# Compile the model GRU
model_3.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# fit network
history_3 = model_3.fit(x_train, y_train, epochs=50, batch_size=8, verbose=1, use_multiprocessing=True, shuffle=False)

In [None]:
model_3.summary()

In [None]:
testPredictions_3 = model_3.predict(x_test)
print(testPredictions_3[:7])

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(history_3.history['loss'], color="tab:blue", label="loss function", linewidth=2)

# membuat label-label
ax.set_xlabel("epoch", fontsize=10)
ax.set_ylabel("loss function", fontsize=10)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

#### 7.1 Evaluasi Model Bidirectional LSTM-RNN

In [None]:
scores_3 = model_3.evaluate(x_train, y_train)
scores_3

In [None]:
scores_3 = model_3.evaluate(x_test, y_test)
scores_3

- MAE (Mean Absolute Error)

In [None]:
mae_3 = mean_absolute_error(y_test, testPredictions_3)
print('Test MAE : %.4f' % mae_3)

- MSE (Mean Squared Error)

In [None]:
mse_3 = mean_squared_error(y_test, testPredictions_3)
print('Test MSE: %.4f' % mse_3)

- RMSE (Root Mean Squared Error)

In [None]:
rmse_3 = sqrt(mse_3)
print('Test RMSE: %.4f' % rmse_3)

- Corelation pearson

In [None]:
hasil_3 = np.stack((y_test.reshape(-1), testPredictions_3.reshape(-1)), axis=1)
hasil_3 = pd.DataFrame(hasil_3, columns = ['data_aktual','prediksi'])

In [None]:
hasil_3

In [None]:
r3, p3 = sc.pearsonr(hasil_3["data_aktual"], hasil_3["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r3)+ "dengan signifikansi" +" {:.4f} ".format(p3))

#### 7.2 Visualisasi Data Bidirectional LSTM-RNN

In [None]:
# inverse value test predictions
testPredictions_3 = scaler.inverse_transform(testPredictions_3)

In [None]:
# shift test predictions for plotting
testPredictionsPlot_3 = np.empty_like(scaled_data)
testPredictionsPlot_3[:, :] = np.nan
testPredictionsPlot_3[(len(dataset) - testPredictions_3.shape[0]):len(dataset), :] = testPredictions_3

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (15,5))

# membuat time series plot
ax.plot(dataset.index.values, scaler.inverse_transform(scaled_data), color="tab:blue", label="data aktual", linewidth=2)
ax.plot(dataset.index.values, testPredictionsPlot_3, color="tab:red", label="data prediksi", linewidth=2)

# membuat label-label
ax.set_title("Percobaan 3 - Model Bidirectional LSTM-RNN", fontsize=14)
ax.legend(loc="best")
ax.grid(True)

# menampilka
plt.show()

### 8 Stacked-Bidirectional LSTM-RNN

In [None]:
# The LSTM architecture
model_4 = Sequential()

# First LSTM layer with Dropout regularisation
model_4.add(
    Bidirectional(
        LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1))
    )
)

# Second LSTM layer with Dropout regularisation
model_4.add(
    Bidirectional(
        LSTM(units=50, return_sequences=False)
    )
)


# The output layer
model_4.add(Dense(1))

# Compile the model LSTM
model_4.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# fit network
history_4 = model_4.fit(x_train, y_train, epochs=50, batch_size=8, verbose=1, use_multiprocessing=True, shuffle=False)

In [None]:
model_4.summary()

In [None]:
testPredictions_4 = model_4.predict(x_test)
print(testPredictions_4[:7])

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(history_4.history['loss'], color="tab:blue", label="loss function", linewidth=2)

# membuat label-label
ax.set_xlabel("epoch", fontsize=10)
ax.set_ylabel("loss function", fontsize=10)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

#### 8.1 Evaluasi Model Stacked-Bidirectional LSTM-RNN

In [None]:
scores_4 = model_4.evaluate(x_train, y_train)
scores_4

In [None]:
scores_4 = model_4.evaluate(x_test, y_test)
scores_4

- MAE (Mean Absolute Error)

In [None]:
mae_4 = mean_absolute_error(y_test, testPredictions_4)
print('Test MAE : %.4f' % mae_4)

- MSE (Mean Squared Error)

In [None]:
mse_4 = mean_squared_error(y_test, testPredictions_4)
print('Test MSE: %.4f' % mse_4)

- RMSE (Root Mean Squared Error)

In [None]:
rmse_4 = sqrt(mse_4)
print('Test RMSE: %.4f' % rmse_4)

- Corelation pearson

In [None]:
hasil_4 = np.stack((y_test.reshape(-1), testPredictions_4.reshape(-1)), axis=1)
hasil_4 = pd.DataFrame(hasil_4, columns = ['data_aktual','prediksi'])

In [None]:
hasil_4

In [None]:
r4, p4 = sc.pearsonr(hasil_4["data_aktual"], hasil_4["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r4)+ "dengan signifikansi" +" {:.4f} ".format(p4))

#### 8.2 Visualisasi Data Stacked-Bidirectional LSTM-RNN

In [None]:
# inverse value test predictions
testPredictions_4 = scaler.inverse_transform(testPredictions_4)

In [None]:
# shift test predictions for plotting
testPredictionsPlot_4 = np.empty_like(scaled_data)
testPredictionsPlot_4[:, :] = np.nan
testPredictionsPlot_4[(len(dataset) - testPredictions_4.shape[0]):len(dataset), :] = testPredictions_4

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (15,5))

# membuat time series plot
ax.plot(dataset.index.values, scaler.inverse_transform(scaled_data), color="tab:blue", label="data aktual", linewidth=2)
ax.plot(dataset.index.values, testPredictionsPlot_4, color="tab:red", label="data prediksi", linewidth=2)

# membuat label-label
ax.set_title("Percobaan 4 - Model Stacked-Bidirectional LSTM-RNN", fontsize=14)
ax.legend(loc="best")
ax.grid(True)

# menampilkan plot
plt.show()

## 9. Complexity time

In [None]:
# Set akhir waktu komputasi 
end = time.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))