In [14]:
import numpy as np
import pandas as pd
import hvplot.pandas
import tensorflow as tf
from pathlib import Path

# API import
import requests
import json
import pickle as pk

In [15]:
# API Call to Kucoin Exchange 
ticker = "BTC"
frequency = "1hour"
epoch = "1620360000"
response_data = requests.get(f'https://api.kucoin.com/api/v1/market/candles?type={frequency}&symbol={ticker}-USDT&startAt={epoch}&endAt=0&limit=10000').json()


# Data Cleaning
df = pd.DataFrame(response_data['data'])

df[0] = pd.to_datetime(df[0], unit='s')
df.columns =['Date', 'Opening price',  'Closing price' , 'Highest price' , 'Lowest price' , 'Transaction volume' ,  'Transaction amount']
df.sort_values(by=['Date'], inplace=True)
df.set_index('Date', inplace=True)
df = df.astype(float)

# Export all data
df.to_csv("./Export/Test-data/DataSet.csv")

In [16]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X any y
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [17]:
# Initiate dictionary with the model variables
model_parameters = {
    "window_size" : 24 ,
    "split_train" : 0.7,
    "dropout_fraction" : 0.2,
    "epoch" : 20 ,
    "batch" : 3,
    "split_val" : 0.2
    }

# Predict Closing Prices using a day window of previous closing prices
window_size = model_parameters["window_size"]

# Use Column index 1, is the `Closing Price` column
feature_column = 1
target_column = 1
X, y = window_data(df, window_size, feature_column, target_column)

# Use 70% of the data for training and the remaineder for testing
split = int(model_parameters["split_train"] * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

In [18]:
# Export X_test and y_test before scaler, this data will be used to compare the different models in order to compare results with same data
X_test_df = pd.DataFrame(X_test)
X_test_df.to_csv("./Export/Test-data/X_test.csv", index=False)


y_test_df = pd.DataFrame(y_test)
y_test_df.to_csv("./Export/Test-data/y_test.csv", index=False)

In [19]:
# Use the MinMaxScaler to scale data between 0 and 1.
from sklearn.preprocessing import MinMaxScaler

    # Create a MinMaxScaler object
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()

    # Fit the MinMaxScaler object with the training feature data X_train
X_scaler.fit(X_train)
    # Scale the features training and testing sets
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)

    # # Fit the MinMaxScaler object with the training target data y_train
Y_scaler.fit(y_train)
    # Scale the target training and testing sets
y_train = Y_scaler.transform(y_train)
y_test = Y_scaler.transform(y_test)


# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [20]:
# Export scaler model
# Save X_scaler model in pickle
filename_x_scaler = "./Export/X_scaler_rnn"
outfile = open(filename_x_scaler, "wb")
pk.dump(X_scaler,outfile)
outfile.close()

# Save y_scaler model in pickle
filename_y_scaler = "./Export/Y_scaler_rnn"
outfile = open(filename_y_scaler, "wb")
pk.dump(Y_scaler,outfile)
outfile.close()

---

## Build and Train the LSTM RNN
In this section, we design a custom LSTM RNN and fit (train) it using the training data.

We do the following things:
1. Define the model architecture
2. Compile the model
3. Fit the model to the training data


In [21]:
# Build the LSTM model. 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional

model = Sequential()

number_units = window_size
dropout_fraction = model_parameters["dropout_fraction"]

# Layer 1
model.add(Bidirectional(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    ))

# Layer 2
model.add(Bidirectional(LSTM(
    units=number_units,
    return_sequences=False))
    )
model.add(Dropout(
    dropout_fraction))

# Output layer
model.add(Dense(1))

# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error", metrics = ["mse", "accuracy"])



# Train the model
batch = model_parameters["batch"]
epoch = model_parameters["epoch"]
validation = model_parameters["split_val"]

model.fit(X_train, y_train , validation_split=validation, epochs=epoch, shuffle=False, batch_size=batch, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1d2f2f05b88>

In [22]:
# Summarize the model
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_2 (Bidirectio  (None, 24, 48)           4992      
 nal)                                                            
                                                                 
 bidirectional_3 (Bidirectio  (None, 48)               14016     
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 48)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 49        
                                                                 
Total params: 19,057
Trainable params: 19,057
Non-trainable params: 0
_________________________________________________________________


---

## Model Performance
In this section we evaluate the model using the test data. 

1. Evaluate the model using the `X_test` and `y_test` data.
2. We eport our model to use for the predictions and the backtesting

In [23]:
# Evaluate the model
model.evaluate(X_test, y_test, return_dict=True)



{'loss': 0.023410310968756676, 'mse': 0.023410310968756676, 'accuracy': 0.0}

In [24]:
# Save model as JSON
rnn_json = model.to_json()

file_path = Path("./Export/rnn_model.json")
with open(file_path, "w") as json_file:
    json_file.write(rnn_json)

# Save weights
file_path = "../Export/rnn_model.h5"
model.save_weights("./Export/rnn_model.h5")

In [25]:
# Make some predictions
prediction =  model.predict(X_test)

# Recover the original prices instead of the scaled version
predicted_prices = Y_scaler.inverse_transform(prediction)
real_prices = Y_scaler.inverse_transform(y_test.reshape(-1, 1))


# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = df.index[-len(real_prices): ]) 

stocks.head()
# Plot the real vs predicted values as a line chart
display(stocks.head(),
        model_parameters,
        stocks.hvplot(title=f"Actual Vs. Predicted {ticker} Prices"))

Unnamed: 0_level_0,Real,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-07 13:00:00,41494.4,44132.238281
2022-01-07 14:00:00,41898.5,43942.414062
2022-01-07 15:00:00,41317.1,43870.890625
2022-01-07 16:00:00,42143.9,43651.699219
2022-01-07 17:00:00,41995.1,43708.984375


{'window_size': 24,
 'split_train': 0.7,
 'dropout_fraction': 0.2,
 'epoch': 20,
 'batch': 3,
 'split_val': 0.2}