## RNN- LSTM - GRU - Xgboost - Prophet Time Series Prediction Bitcoin

### 1. Deklarasi Pustaka

In [None]:
# pustaka untuk manipulasi data-frame
import pandas as pd
from pandas import concat
from pandas import read_csv
from pandas import read_excel
from pandas_datareader import DataReader

# pustaka untuk madnipulasi data-array
import numpy as np
from numpy import concatenate
from numpy import array

# pustaka untuk waktu komputasi
import time
from datetime import datetime

# pustaka untuk visualisasi data
import seaborn as sns
from matplotlib import pyplot
from matplotlib import pyplot as plt

# pustaka untuk visualisasi acf dan pacf
import scipy.stats as sc
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
        
# pustaka untuk membuat data latih dan data uji.
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

# pustaka untuk membuat model prediksi LSTM-RNN
import itertools
import tensorflow as tf
from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import SimpleRNN
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import Adam, Adamax, RMSprop, SGD

# early stoping
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# pustaka untuk  evaluasi model prediksi
import math
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

### 2. Akuisisi Data

In [None]:
# Set waktu komputasi
start = time.time()

In [None]:
# fix random seed for reproducibility
np.random.seed(1234);

In [None]:
# membaca dataset via csv file
dataset = read_csv("dataset/BTC-USD.csv", parse_dates=['Date']);

In [None]:
# set index tanggal
dataset = dataset.set_index("Date");

In [None]:
# menampilkan metadata dataset
dataset.info();

In [None]:
dataset

### 3. Eksplorasi Data Analisis

In [None]:
# membuat frame
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, facecolor="#f0f0f0", figsize=(20, 10))

# membuat time series plot
ax1.plot(dataset.index.values, dataset["Open"], color="tab:blue", label="Open Price", linewidth=2)

# membuat label-label
ax1.set_title("Open Price Bitcoin", fontsize=14)
ax1.set_xlabel("Date", fontsize=12)
ax1.set_ylabel("Stock Price", fontsize=12)
ax1.legend(loc='upper left')
ax1.grid(True)
# ----------------------------------------------------------------------------------------------------

# membuat time series plot
ax2.plot(dataset.index.values, dataset["Close"], color="tab:green", label="Close Price", linewidth=2)

# membuat label-label
ax2.set_title("Close Price Bitcoin", fontsize=14)
ax2.set_xlabel("Date", fontsize=12)
ax2.set_ylabel("Stock Price", fontsize=12)
ax2.legend(loc='upper left')
ax2.grid(True)
# ----------------------------------------------------------------------------------------------------

# membuat time series plot
ax3.plot(dataset.index.values, dataset["High"], color="tab:orange", label="High Price", linewidth=2)

# membuat label-label
ax3.set_title("High Price Bitcoin", fontsize=14)
ax3.set_xlabel("Date", fontsize=12)
ax3.set_ylabel("Stock Price", fontsize=12)
ax3.legend(loc='upper left')
ax3.grid(True)
# ----------------------------------------------------------------------------------------------------

# membuat time series plot
ax4.plot(dataset.index.values, dataset["Low"], color="tab:red", label="Low Price", linewidth=2)

# membuat label-label
ax4.set_title("Low Price Bitcoin", fontsize=14)
ax4.set_xlabel("Date", fontsize=12)
ax4.set_ylabel("Stock Price", fontsize=12)
ax4.legend(loc='upper left')
ax4.grid(True)
# ----------------------------------------------------------------------------------------------------

# set the spacing between subplots
plt.subplots_adjust(wspace=0.15, hspace=0.25)

# menampilkan plot
plt.show()

### 4. Praproses Dataset

- Seleksi Fitur

In [None]:
# memilih fitur close price
dataset = dataset.filter(['Close']);

In [None]:
# convert dataframe to series close price
data = dataset.values

In [None]:
np.round(data[:5],7)

In [None]:
data.shape

- Normalisasi Data

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(np.array(data).reshape(-1,1))

In [None]:
np.round(scaled_data[:5],7)

In [None]:
scaled_data.shape

In [None]:
# membuat frame
fig, ax = plt.subplots(facecolor="#f0f0f0", figsize=(8,4))

# membuat time series plot
ax.plot(dataset.index.values, scaled_data, color="tab:blue", label="CLose Price", linewidth=2)

# membuat label-label
ax.set_title("Close Price Bitcoin", fontsize=10)
ax.set_xlabel("Date", fontsize=9)
ax.set_ylabel("Stock Price", fontsize=9)
ax.legend(loc='upper left')
ax.grid(True)

# menampilkan plot
plt.show()

- split data

In [None]:
# # set data train
# train_size = math.ceil(len(dataset)*0.8)
# train_data = scaled_data[0:train_size , :]

In [None]:
# np.round(train_data[:5],7)

In [None]:
# train_data.shape

In [None]:
# # set data test
# test_size = len(dataset) - train_size
# test_data = scaled_data[train_size:len(dataset) , :]

In [None]:
# np.round(test_data[:5],7)

In [None]:
# test_data.shape

In [None]:
# split data train and test
train_data, test_data = train_test_split(scaled_data, train_size=0.80, test_size=0.20, shuffle=False)

In [None]:
# data train
np.round(train_data[:5],7)

In [None]:
train_data.shape

In [None]:
# data test
np.round(test_data[:5],7)

In [None]:
test_data.shape

- Supervised Learning

In [None]:
# convert an array of values into a dataset matrix

# function for supervised learning
def create_dataset(look_back, dataset):
    
    # declare variable X and Y
    dataX = []
    dataY = []
    
    # for loop for create supervised learning
    for i in range(look_back, len(dataset)):
        
        # insert value X and Y 
        dataX.append(dataset[i-look_back:i, 0])
        dataY.append(dataset[i, 0])
        
    # return value X and Y
    return np.array(dataX), np.array(dataY)

In [None]:
# process supervised learning
look_back = 60
x_train, y_train = create_dataset(look_back, train_data)
x_test, y_test = create_dataset(look_back, test_data)

In [None]:
print(x_train.shape, y_train.shape)

In [None]:
print(x_test.shape, y_test.shape)

In [None]:
# reshape input to be [samples, time steps, features]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
print(x_train.shape, x_test.shape)

### 5. Modeling LSTM-RNN

In [None]:
# The LSTM architecture
model = Sequential()

# First LSTM layer with Dropout regularisation
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.05))

# Second LSTM layer with Dropout regularisation
model.add(LSTM(50, return_sequences=False))

# The output layer
model.add(Dense(1))

# Compile the model LSTM
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# fit network
history = model.fit(x_train, y_train, epochs=50, batch_size=16, verbose=1, use_multiprocessing=True, shuffle=False)

In [None]:
model.summary()

In [None]:
predictions = model.predict(x_test)

In [None]:
print(predictions[:7])

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (8,4))

# membuat time series plot
ax.plot(history.history['loss'], color="tab:blue", label="loss func", linewidth=2)

# membuat label-label
ax.set_xlabel("epoch", fontsize=10)
ax.set_ylabel("loss function", fontsize=10)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()

### 6. Evaluasi Model LSTM-RNN

In [None]:
scores = model.evaluate(x_train, y_train)
scores

In [None]:
scores = model.evaluate(x_test, y_test)
scores

- MAE (Mean Absolute Error)

In [None]:
mae = mean_absolute_error(y_test, predictions)
print('Test MAE : %.4f' % mae)

- MSE (Mean Squared Error)

In [None]:
mse = mean_squared_error(y_test, predictions)
print('Test MSE: %.4f' % mse)

- RMSE (Root Mean Squared Error)

In [None]:
rmse = sqrt(mse)
print('Test RMSE: %.4f' % rmse)

- Corelation pearson

In [None]:
hasil = np.stack((y_test.reshape(-1), predictions.reshape(-1)), axis=1)
hasil = pd.DataFrame(hasil, columns = ['data_aktual','prediksi'])

In [None]:
hasil

In [None]:
r, p = sc.pearsonr(hasil["data_aktual"], hasil["prediksi"])
print("korelasi data akual dengan hasil prediksi" +" {:.4f} ".format(r)+ "dengan signifikansi" +" {:.4f} ".format(p))

- Complexity time

In [None]:
# Set akhir waktu komputasi 
end = time.time()

In [None]:
# Proses menghitung waktu komputasi
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)

In [None]:
# Hasil waktu komputasi
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

### 7. Visualisasi Data

In [None]:
# membuat frame
fig, ax = plt.subplots(figsize = (10,5))

# membuat time series plot
ax.plot(y_test, color="tab:blue", label="data aktual", linewidth=2.5)
ax.plot(predictions, color="tab:red", label="hasil prediksi", linewidth=2.5)

# membuat label-label
# ax.set_title("Hotspot Sumsel Sensor MODIS 2018-2020", fontsize=14)
ax.set_xlabel("Tanggal", fontsize=12)
ax.set_ylabel("Jumlah Hostpot", fontsize=12)
ax.legend(loc='upper right')
ax.grid(True)

# menampilkan plot
plt.show()