### Import Libraries

In [21]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, LSTM
from sklearn.metrics import mean_absolute_error
from datetime import datetime

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

### Import data

In [22]:
crypto_df = pd.read_csv("../input/g-research-crypto-forecasting/train.csv") 

In [23]:
crypto_df.head()

In [24]:
asset_details = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset_details

In [25]:
# Select Asset_ID = 6 for Ethereum
crypto_df = crypto_df[crypto_df["Asset_ID"]==6] 
crypto_df.info(show_counts =True)

###  Preprocessing

In [26]:
df = crypto_df.copy()

In [27]:
# fill missing values 
df = df.reindex(range(df.index[0],df.index[-1]+60,60),method='pad')
df = df.fillna(0)

In [28]:
# rename column timestamp to Date
df.rename({'timestamp': 'Date'}, axis=1, inplace=True)
df.rename(columns={'Close': 'Price'}, inplace=True)

In [29]:
# set index
df.set_index('Date', inplace=True)

In [30]:
# Convert to date array
timesteps = df.index.to_numpy()
prices = df['Price'].to_numpy()

timesteps[:10], prices[:10]

### Modeling Dense model

In [31]:
# Create Window dataset
HORIZON = 1      # predict 1 step at a time
WINDOW_SIZE = 7  # use a week worth of timesteps to predict the horizon

In [32]:
# Create function to label windowed data
def get_labelled_windows(x, horizon=1):
  """
  Creates labels for windowed dataset.
  E.g. if horizon=1 (default)
  Input: [1, 2, 3, 4, 5, 6] -> Output: ([1, 2, 3, 4, 5], [6])
  """
  return x[:, :-horizon], x[:, -horizon:]

In [48]:
# Test the window labelling function
test_window, test_label = get_labelled_windows(tf.expand_dims(tf.range(8)+1, axis=0), horizon=HORIZON)
print(f"Window: {tf.squeeze(test_window).numpy()} -> Label: {tf.squeeze(test_label).numpy()}")

In [34]:
# Create function to view NumPy arrays as windows
def make_windows(x, window_size=7, horizon=1):
  """
  Turns a 1D array into a 2D array of sequential windows of window_size.
  """
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T 
  windowed_array = x[window_indexes]
  windows, labels = get_labelled_windows(windowed_array, horizon=horizon)

  return windows, labels

In [35]:
full_windows, full_labels = make_windows(prices, window_size=WINDOW_SIZE, horizon=HORIZON)
len(full_windows), len(full_labels)

In [36]:
# Create function for train-test-split
def make_train_test_splits(windows, labels, test_split=0.2):
  """
  Splits matching pairs of windows and labels into train and test splits.
  """
  split_size = int(len(windows) * (1-test_split)) 
  train_windows = windows[:split_size]
  train_labels = labels[:split_size]
  test_windows = windows[split_size:]
  test_labels = labels[split_size:]
  return train_windows, test_windows, train_labels, test_labels

In [37]:
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(full_windows, full_labels)
len(train_windows), len(test_windows), len(train_labels), len(test_labels)

In [38]:
train_windows[:5], train_labels[:5]

In [39]:
# Create model callbacks
import os

# Create a function to implement a ModelCheckpoint callback with a specific filename 
def create_model_checkpoint(model_name, save_path="model_experiments"):
  return tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(save_path, model_name), # create filepath to save model
                                            verbose=0,                                    # only output a limited amount of text
                                            save_best_only=True)                          # save only the best model to file

### Dense model - window = 7 horizon = 1

In [55]:
import tensorflow as tf
from tensorflow.keras import layers

# Set random seed for reproducible results 
tf.random.set_seed(42)

# Construct the model
dense_model = tf.keras.Sequential(
[
  layers.Dense(128, activation="relu"),
  layers.Dense(HORIZON, activation="linear") # linear activation is the same as having no activation                        
], name="dense_model_1")                     # name of the model to save

# Compile the model
dense_model.compile(loss="mae",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["mae"])             

# Fit the model
dense_model.fit(x=train_windows,             # train windows of 7 timesteps of Ethereum prices
            y=train_labels,                  # horizon value of 1 (using the previous 7 timesteps to predict next day)
            epochs=100,
            verbose=1,
            batch_size=128,
            validation_data=(test_windows, test_labels),
            callbacks=[create_model_checkpoint(model_name=dense_model.name)]) # create ModelCheckpoint callback 
                                                                              # to save best model

In [56]:
# Evaluate model on the test data
dense_model.evaluate(test_windows, test_labels)

In [57]:
# Load in saved best performing model and evaluate on the test data
dense_model = tf.keras.models.load_model("model_experiments/dense_model_1")
dense_model.evaluate(test_windows, test_labels)

In [58]:
# Function for forecasting on the test dataset
def make_preds(model, input_data):
  """
  Uses model to make predictions on input_data.

  Parameters
  ----------
       model: trained model 
  input_data: windowed input data (same kind of data model was trained on)

  Returns model predictions on input_data.
  """
  forecast = model.predict(input_data)
  # return 1D array of predictions  
  return tf.squeeze(forecast)         

In [59]:
# Make predictions using dense_model on the test dataset and view the results
dense_model_preds = make_preds(dense_model, test_windows)
len(dense_model_preds), dense_model_preds[:10]

In [60]:
# Function to evaluate prediction
def evaluate_preds(y_true, y_pred):
  # Make sure float32 (for metric calculations)
  y_true = tf.cast(y_true, dtype=tf.float32)
  y_pred = tf.cast(y_pred, dtype=tf.float32)

  # Calculate various metrics
  mae = tf.keras.metrics.mean_absolute_error(y_true, y_pred)
  mse = tf.keras.metrics.mean_squared_error(y_true, y_pred)
  rmse = tf.sqrt(mse)
  mape = tf.keras.metrics.mean_absolute_percentage_error(y_true, y_pred)
  
  return {"mae": mae.numpy(),
          "mse": mse.numpy(),
          "rmse": rmse.numpy(),
          "mape": mape.numpy()}

In [61]:
# Evaluate prediction
dense_model_results = evaluate_preds(y_true=tf.squeeze(test_labels), # reduce to right shape
                                     y_pred=dense_model_preds)
dense_model_results