In [None]:
# libraries
import pandas as pd
import pandas_datareader as pdr
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

import os
import datetime
from IPython import display

In [None]:
tf.__version__

In [None]:
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
# Building a complex and powerful Recurrent Neural Network (RNN).
# It will be able to predict profitable trading signals.

In [None]:
# Using a Recurrent Neural Network that allows forward propagation and backward propagation. Only RNN allows backward propagation aka feedback connection, which is important as the model self-adjusts its nuerons weights and bias after it has predicted the value and compared it to the actual value and hence it knows what the loss function value or RMSE is, so that it knows what weights to adjust to decrease it.
# Example of RNN model is LSTM model which is designed for Time Series Analysis

In [None]:
# Build stock/crypto price prediction for next day, next 3 days (72 hours), next 7 days (daily prices), next month (daily prices).

In [None]:
# Gathering the data.
# Trying the Bitcoin BTC first.
# Downloading Last 5 years of data.
btc_data = pdr.get_data_yahoo(["BTC-USD"], start=datetime.datetime(2015, 12, 30), end=datetime.datetime.now())

In [None]:
btc_data

In [None]:
# Converting multindex columns to single level.
btc_data.columns = [x[0] for x in btc_data.columns]

In [None]:
# Time Series data
ts_copy = btc_data.copy()

In [None]:
ts_copy = pd.Series(ts_copy.index)

In [None]:
ts_copy = pd.DataFrame(ts_copy)

In [None]:
# Explore data.
# Find Outliers and Empty values
# Empty values in any() method are considered np.NaN, 0 or False or "" empty string.
btc_data[~btc_data.any(axis=1)]

In [None]:
# Lets see the data distribution
btc_data.hist(figsize=(20, 15))
plt.show()

In [None]:
# Using Data Windowing to create labels for your inputted window
# The model will make set of predictions based on a window of consecutive samples from the price data
# The width (number of time steps) of the input and label windows.
# The time offset between them.
# Which features are used as inputs, labels, or both.
# Time step (1 day)

# in days
input_width = 3
label_width = 3

def sliding_window(input_width, label_width, df):
    indices = np.arange(len(df))
    column_length = len(df.columns)
    
    y_labels_df = df[['Date', 'Close']]

    store_inp_data = []
    store_labl_data = []

    for idx in indices[::input_width + label_width]:
        store_inp_data.append(df[idx: idx+input_width].values)
        store_labl_data.append(y_labels_df[idx+input_width: idx+input_width+label_width].values)
    
    return np.array(store_inp_data), np.array(store_labl_data)


In [None]:
input_df = btc_data.copy()

In [None]:
input_df_reset = input_df.reset_index()

In [None]:
X_window_data, y_window_data = sliding_window(input_width, label_width, input_df_reset)

In [None]:
X_window_data.shape

In [None]:
y_window_data.shape

In [None]:
# Train Test Split
X_train_full, X_test, y_train_full, y_test = train_test_split(X_window_data, y_window_data, test_size=0.2, shuffle=False)

In [None]:
# Splitting further into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, shuffle=False)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
# Storing respective time data for each dataframe
X_test_time_index = X_test[:,:,0]
X_train_time_index = X_train[:,:,0]
X_valid_time_index = X_valid[:,:,0]

In [None]:
X_test = X_test[:,:,1:]
X_train = X_train[:,:,1:]
X_valid = X_valid[:,:,1:]

In [None]:
y_train = y_train[:,:,1:]
y_valid = y_valid[:,:,1:]
y_test = y_test[:,:,1:]

In [None]:
X_test_reshaped = X_test.reshape(-1, 18)
X_train_reshaped = X_train.reshape(-1,18)
X_valid_reshaped = X_valid.reshape(-1, 18)

In [None]:
# Scaling the data. Specifically is Standardizing the data, because RNN are very sensitive to outliers and need data to be scaled to same scales.
scaler = StandardScaler()
imputer = SimpleImputer(strategy="median")

In [None]:
num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("std_scaler", StandardScaler()),
])

In [None]:
X_train_scaled = num_pipeline.fit_transform(X_train_reshaped)
X_valid_scaled = num_pipeline.transform(X_valid_reshaped)
X_test_scaled = num_pipeline.transform(X_test_reshaped)

In [None]:
# reshaping the X_inputs back into 3 dimensional ndarray
X_test_3d = X_test_scaled.reshape(-1, 3, 6)
X_train_3d = X_train_scaled.reshape(-1, 3, 6)
X_valid_3d = X_valid_scaled.reshape(-1, 3, 6)

In [None]:
OUT_STEPS = X_train_3d.shape[1]
num_features = 1

multi_lstm_model = tf.keras.Sequential([
    # Shape [batch, time_steps, features] => [batch, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=False),
    # Shape => [btach, out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS * num_features, kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_features]),
])

In [None]:
multi_lstm_model.compile(loss=tf.losses.MeanSquaredError(),
                        metrics=[tf.metrics.MeanAbsoluteError()])

In [None]:
y_train = y_train.astype("float")
y_valid = y_valid.astype("float")

In [None]:
history = multi_lstm_model.fit(X_train_3d, y_train, epochs=20)