# Deep Learning

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

plt.rcParams["figure.figsize"] = [10, 6]

In [2]:
# Load the data
merged_df = (
    pd.read_csv(
        "../data/merged_df.csv", parse_dates=["timestamp"], index_col="timestamp"
    )
    .sort_index()
    .convert_dtypes()
)
arima_predictions_T = (pd.read_csv('../data/arima_predictions_T.csv', parse_dates=['timestamp'], index_col='timestamp')
                       .sort_index()
                       .convert_dtypes())
arima_predictions_V = (pd.read_csv('../data/arima_predictions_VZ.csv', parse_dates=['timestamp'], index_col='timestamp')
                       .sort_index()
                       .convert_dtypes())

In [3]:
merged_df.head(10)

Unnamed: 0_level_0,close_t,close_v,t_diff,v_diff
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-02,29.54,56.02,0.0,0.0
2019-01-03,29.58,56.22,0.04,0.2
2019-01-04,30.34,56.36,0.76,0.14
2019-01-05,30.34,56.36,0.0,0.0
2019-01-06,30.34,56.36,0.0,0.0
2019-01-07,30.89,56.72,0.55,0.36
2019-01-08,31.28,58.38,0.39,1.66
2019-01-09,30.1,57.05,-1.18,-1.33
2019-01-10,30.4,57.6,0.3,0.55
2019-01-11,30.87,58.02,0.47,0.42


Before running the RNN and LSTM models, you need to create independent (X) and dependent variables (Y). You can use 10-step lag to create a dependent variable. Alternatively, you can identify your step size. Then, in order to feed the RNN model, you need to create three-dimensional data. These dimensions are samples, time steps, and features. The number of features is 1, as your model is univariate.

In [5]:
# Create separate datasets out of the time series.
t_dataset = merged_df['close_t']
v_dataset = merged_df['close_v']

# Make sure we can use the same split point for both datasets
assert len(t_dataset) == len(v_dataset)

# Creating split points to maintain temporal integrity
split_point = int(len(t_dataset) * 0.95)

t_nn_train = t_dataset.iloc[:split_point]
t_nn_test = t_dataset.iloc[split_point:]
v_nn_train = v_dataset.iloc[:split_point]
v_nn_test = v_dataset.iloc[split_point:]

In [7]:
# Create a function to split the dataset into sub-sequences
def create_dataset(data: pd.Series, window_size: int) -> tuple:
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:(i + window_size)])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

In [8]:
# Split the dataset
n_steps = 10
n_features = 1

t_X_train, t_y_train = create_dataset(t_nn_train.values, n_steps)
t_X_train = t_X_train.reshape(t_X_train.shape[0], t_X_train.shape[1], n_features)

t_X_test, t_y_test = create_dataset(t_nn_test.values, n_steps)
t_X_test = t_X_test.reshape(t_X_test.shape[0], t_X_test.shape[1], n_features)

v_X_train, v_y_train = create_dataset(t_nn_train.values, n_steps)
v_X_train = v_X_train.reshape(v_X_train.shape[0], v_X_train.shape[1], n_features)

v_X_test, v_y_test = create_dataset(v_nn_test.values, n_steps)
v_X_test = v_X_test.reshape(v_X_test.shape[0], v_X_test.shape[1], n_features)