### Import Libraries

In [21]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, LSTM
from sklearn.metrics import mean_absolute_error
from datetime import datetime

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

### Import data

In [22]:
crypto_df = pd.read_csv("../input/g-research-crypto-forecasting/train.csv") 

In [23]:
crypto_df.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target
0,1514764860,2,40.0,2376.58,2399.5,2357.14,2374.59,19.233005,2373.116392,-0.004218
1,1514764860,0,5.0,8.53,8.53,8.53,8.53,78.38,8.53,-0.014399
2,1514764860,1,229.0,13835.194,14013.8,13666.11,13850.176,31.550062,13827.062093,-0.014643
3,1514764860,5,32.0,7.6596,7.6596,7.6567,7.6576,6626.71337,7.657713,-0.013922
4,1514764860,7,5.0,25.92,25.92,25.874,25.877,121.08731,25.891363,-0.008264


In [24]:
asset_details = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset_details

Unnamed: 0,Asset_ID,Weight,Asset_Name
0,2,2.397895,Bitcoin Cash
1,0,4.304065,Binance Coin
2,1,6.779922,Bitcoin
3,5,1.386294,EOS.IO
4,7,2.079442,Ethereum Classic
5,6,5.894403,Ethereum
6,9,2.397895,Litecoin
7,11,1.609438,Monero
8,13,1.791759,TRON
9,12,2.079442,Stellar


In [25]:
# Select Asset_ID = 6 for Ethereum
crypto_df = crypto_df[crypto_df["Asset_ID"]==6] 
crypto_df.info(show_counts =True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1956200 entries, 5 to 24236799
Data columns (total 10 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   timestamp  1956200 non-null  int64  
 1   Asset_ID   1956200 non-null  int64  
 2   Count      1956200 non-null  float64
 3   Open       1956200 non-null  float64
 4   High       1956200 non-null  float64
 5   Low        1956200 non-null  float64
 6   Close      1956200 non-null  float64
 7   Volume     1956200 non-null  float64
 8   VWAP       1956200 non-null  float64
 9   Target     1955860 non-null  float64
dtypes: float64(8), int64(2)
memory usage: 164.2 MB


###  Preprocessing

In [26]:
df = crypto_df.copy()

In [27]:
# fill missing values 
df = df.reindex(range(df.index[0],df.index[-1]+60,60),method='pad')
df = df.fillna(0)

In [28]:
# rename column timestamp to Date
df.rename({'timestamp': 'Date'}, axis=1, inplace=True)
df.rename(columns={'Close': 'Price'}, inplace=True)

In [29]:
# set index
df.set_index('Date', inplace=True)

In [30]:
# Convert to date array
timesteps = df.index.to_numpy()
prices = df['Price'].to_numpy()

timesteps[:10], prices[:10]

(array([1514764860, 1514765280, 1514765760, 1514766240, 1514766660,
        1514767140, 1514767620, 1514768100, 1514768580, 1514769060]),
 array([738.5075, 735.09  , 734.8025, 731.82  , 732.9325, 732.3425,
        731.8225, 732.1325, 732.605 , 729.415 ]))

### Modeling Dense model

In [31]:
# Create Window dataset
HORIZON = 1      # predict 1 step at a time
WINDOW_SIZE = 7  # use a week worth of timesteps to predict the horizon

In [32]:
# Create function to label windowed data
def get_labelled_windows(x, horizon=1):
  """
  Creates labels for windowed dataset.
  E.g. if horizon=1 (default)
  Input: [1, 2, 3, 4, 5, 6] -> Output: ([1, 2, 3, 4, 5], [6])
  """
  return x[:, :-horizon], x[:, -horizon:]

In [48]:
# Test the window labelling function
test_window, test_label = get_labelled_windows(tf.expand_dims(tf.range(8)+1, axis=0), horizon=HORIZON)
print(f"Window: {tf.squeeze(test_window).numpy()} -> Label: {tf.squeeze(test_label).numpy()}")

Window: [1 2 3 4 5 6 7] -> Label: 8


In [34]:
# Create function to view NumPy arrays as windows
def make_windows(x, window_size=7, horizon=1):
  """
  Turns a 1D array into a 2D array of sequential windows of window_size.
  """
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T 
  windowed_array = x[window_indexes]
  windows, labels = get_labelled_windows(windowed_array, horizon=horizon)

  return windows, labels

In [35]:
full_windows, full_labels = make_windows(prices, window_size=WINDOW_SIZE, horizon=HORIZON)
len(full_windows), len(full_labels)

(403941, 403941)

In [36]:
# Create function for train-test-split
def make_train_test_splits(windows, labels, test_split=0.2):
  """
  Splits matching pairs of windows and labels into train and test splits.
  """
  split_size = int(len(windows) * (1-test_split)) 
  train_windows = windows[:split_size]
  train_labels = labels[:split_size]
  test_windows = windows[split_size:]
  test_labels = labels[split_size:]
  return train_windows, test_windows, train_labels, test_labels

In [37]:
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(full_windows, full_labels)
len(train_windows), len(test_windows), len(train_labels), len(test_labels)

(323152, 80789, 323152, 80789)

In [38]:
train_windows[:5], train_labels[:5]

(array([[738.5075, 735.09  , 734.8025, 731.82  , 732.9325, 732.3425,
         731.8225],
        [735.09  , 734.8025, 731.82  , 732.9325, 732.3425, 731.8225,
         732.1325],
        [734.8025, 731.82  , 732.9325, 732.3425, 731.8225, 732.1325,
         732.605 ],
        [731.82  , 732.9325, 732.3425, 731.8225, 732.1325, 732.605 ,
         729.415 ],
        [732.9325, 732.3425, 731.8225, 732.1325, 732.605 , 729.415 ,
         731.32  ]]),
 array([[732.1325],
        [732.605 ],
        [729.415 ],
        [731.32  ],
        [733.5625]]))

In [39]:
# Create model callbacks
import os

# Create a function to implement a ModelCheckpoint callback with a specific filename 
def create_model_checkpoint(model_name, save_path="model_experiments"):
  return tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(save_path, model_name), # create filepath to save model
                                            verbose=0,                                    # only output a limited amount of text
                                            save_best_only=True)                          # save only the best model to file

#### Dense model - window = 7 horizon = 1

In [55]:
import tensorflow as tf
from tensorflow.keras import layers

# Set random seed for reproducible results 
tf.random.set_seed(42)

# Construct the model
dense_model = tf.keras.Sequential(
[
  layers.Dense(128, activation="relu"),
  layers.Dense(HORIZON, activation="linear") # linear activation is the same as having no activation                        
], name="dense_model_1")                     # name of the model to save

# Compile the model
dense_model.compile(loss="mae",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["mae"])             

# Fit the model
dense_model.fit(x=train_windows,             # train windows of 7 timesteps of Ethereum prices
            y=train_labels,                  # horizon value of 1 (using the previous 7 timesteps to predict next day)
            epochs=100,
            verbose=1,
            batch_size=128,
            validation_data=(test_windows, test_labels),
            callbacks=[create_model_checkpoint(model_name=dense_model.name)]) # create ModelCheckpoint callback 
                                                                              # to save best model

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f10661e4710>

In [56]:
# Evaluate model on the test data
dense_model.evaluate(test_windows, test_labels)



[5.480630397796631, 5.480630397796631]

In [57]:
# Load in saved best performing model and evaluate on the test data
dense_model = tf.keras.models.load_model("model_experiments/dense_model_1")
dense_model.evaluate(test_windows, test_labels)



[5.37371826171875, 5.37371826171875]

In [58]:
# Function for forecasting on the test dataset
def make_preds(model, input_data):
  """
  Uses model to make predictions on input_data.

  Parameters
  ----------
       model: trained model 
  input_data: windowed input data (same kind of data model was trained on)

  Returns model predictions on input_data.
  """
  forecast = model.predict(input_data)
  # return 1D array of predictions  
  return tf.squeeze(forecast)         

In [59]:
# Make predictions using dense_model on the test dataset and view the results
dense_model_preds = make_preds(dense_model, test_windows)
len(dense_model_preds), dense_model_preds[:10]

(80789,
 <tf.Tensor: shape=(10,), dtype=float32, numpy=
 array([1225.3612, 1231.0071, 1229.0295, 1228.6338, 1227.2433, 1227.8109,
        1228.2229, 1221.4147, 1225.6765, 1227.5248], dtype=float32)>)

In [60]:
# Function to evaluate prediction
def evaluate_preds(y_true, y_pred):
  # Make sure float32 (for metric calculations)
  y_true = tf.cast(y_true, dtype=tf.float32)
  y_pred = tf.cast(y_pred, dtype=tf.float32)

  # Calculate various metrics
  mae = tf.keras.metrics.mean_absolute_error(y_true, y_pred)
  mse = tf.keras.metrics.mean_squared_error(y_true, y_pred)
  rmse = tf.sqrt(mse)
  mape = tf.keras.metrics.mean_absolute_percentage_error(y_true, y_pred)
  
  return {"mae": mae.numpy(),
          "mse": mse.numpy(),
          "rmse": rmse.numpy(),
          "mape": mape.numpy()}

In [61]:
# Evaluate prediction
dense_model_results = evaluate_preds(y_true=tf.squeeze(test_labels), # reduce to right shape
                                     y_pred=dense_model_preds)
dense_model_results

{'mae': 5.373713, 'mse': 78.89699, 'rmse': 8.882398, 'mape': 0.22432236}