### Import Libraries

In [45]:
import numpy as np  
import pandas as pd  

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, LSTM

import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from datetime import datetime

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

In [46]:
# Parse dates and set date column to index
crypto_df = pd.read_csv("../input/g-research-crypto-forecasting/train.csv") 

In [47]:
crypto_df.head()

In [48]:
asset_details = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
asset_details

### Preprocessing

In [49]:
df = crypto_df.copy()

In [50]:
# rename column timestamp to Date
df.rename({'timestamp': 'Date'}, axis=1, inplace=True)

In [51]:
# timestamp conversion
df.Date = df.Date.apply(lambda d: datetime.fromtimestamp(int(d)).strftime('%Y-%m-%d'))

In [52]:
# Asset_ID = 6 for Ethereum
df = df[df["Asset_ID"]==6].set_index("Date") # Asset_ID = 6 for Ethereum
df.info(show_counts =True)

In [53]:
df = df.fillna(0)

In [54]:
df.rename(columns={'Close': 'Price'}, inplace=True)

In [55]:
# Convert to date array
timesteps = df.index.to_numpy()
prices = df['Price'].to_numpy()

In [56]:
# How many samples do we have?
len(df)

In [57]:
# Create train and test splits for time series data
# 80% train, 20% test
split_size = int(0.8 * len(prices)) 

# Create train data splits 
X_train, y_train = timesteps[:split_size], prices[:split_size]

# Create test data splits 
X_test, y_test = timesteps[split_size:], prices[split_size:]

len(X_train), len(X_test), len(y_train), len(y_test)

### Create a plotting function

In [58]:
# Create a function to plot time series data
def plot_time_series(timesteps, values, format='.', start=0, end=None, label=None):
  """
  Plots a timesteps (a series of points in time) against values (a series of values across timesteps).
  
  Parameters
  ---------
  timesteps : array of timesteps
  values : array of values across time
  format : style of plot, default "."
  start : start of plot (setting a value will index from start of timesteps & values)
  end :   end of plot (setting a value will index from end of timesteps & values)
  label : label to show on plot of values
  """
  # Plot the series
  plt.plot(timesteps[start:end], values[start:end], format, label=label)
  plt.xlabel("Time")
  plt.ylabel("ETH Price")
  if label:
    plt.legend(fontsize=14) # make label bigger
  plt.grid(False)

In [59]:
# Create plot
plt.figure(figsize=(15, 7))
plot_time_series(timesteps=X_train, values=y_train, label="Train data")
plot_time_series(timesteps=X_test, values=y_test, label="Test data")

### Modeling: Naive forecast 

In [60]:
### Create a naïve forecast
naive_forecast = y_test[:-1]  
naive_forecast[:10], naive_forecast[-10:] 

In [61]:
### Plot naive forecast
plt.figure(figsize=(15, 7))
plot_time_series(timesteps=X_train, values=y_train, label="Train data")
plot_time_series(timesteps=X_test, values=y_test, label="Test data")
plot_time_series(timesteps=X_test[1:], values=naive_forecast, format="-", label="Naive forecast");

In [62]:
plt.figure(figsize=(15, 7))
offset = 300 # offset the values by 300 timesteps 
plot_time_series(timesteps=X_test, values=y_test, start=offset, label="Test data")
plot_time_series(timesteps=X_test[1:], values=naive_forecast, format="-", start=offset, label="Naive forecast");

### Evaluating a time series model

In [63]:
# MASE implemented courtesy of sktime - https://github.com/alan-turing-institute/sktime

def mean_absolute_scaled_error(y_true, y_pred):
  """
  Implement MASE (assuming no seasonality of data).
  """
  mae = tf.reduce_mean(tf.abs(y_true - y_pred))

  # Find MAE of naive forecast (no seasonality)
  mae_naive_no_season = tf.reduce_mean(tf.abs(y_true[1:] - y_true[:-1])) 

  return mae / mae_naive_no_season

In [64]:
def evaluate_preds(y_true, y_pred):
  # Make sure float32 (for metric calculations)
  y_true = tf.cast(y_true, dtype=tf.float32)
  y_pred = tf.cast(y_pred, dtype=tf.float32)

  # Calculate various metrics
  mae = tf.keras.metrics.mean_absolute_error(y_true, y_pred)
  mse = tf.keras.metrics.mean_squared_error(y_true, y_pred) 
  rmse = tf.sqrt(mse)
  mape = tf.keras.metrics.mean_absolute_percentage_error(y_true, y_pred)
  mase = mean_absolute_scaled_error(y_true, y_pred)
  
  return {"mae": mae.numpy(),
          "mse": mse.numpy(),
          "rmse": rmse.numpy(),
          "mape": mape.numpy(),
          "mase": mase.numpy()}

In [65]:
naive_results = evaluate_preds(y_true=y_test[1:], y_pred=naive_forecast)

In [66]:
print(naive_results)