In [1]:
# MASE implemented courtesy of sktime - https://github.com/alan-turing-institute/sktime/blob/ee7a06843a44f4aaec7582d847e36073a9ab0566/sktime/performance_metrics/forecasting/_functions.py#L16
def mean_absolute_scaled_error(y_true, y_pred):
  """
  Implement MASE (assuming no seasonality of data).
  """
  mae = tf.reduce_mean(tf.abs(y_true - y_pred))


  return mae


In [2]:
def evaluate_preds(y_true, y_pred):
  # Make sure float32 (for metric calculations)
  y_true = tf.cast(y_true, dtype=tf.float32)
  y_pred = tf.cast(y_pred, dtype=tf.float32)

  mse = tf.keras.metrics.mean_squared_error(y_true, y_pred) # puts and emphasis on outliers (all errors get squared)
  rmse = tf.sqrt(mse)
  nrmse = rmse / tf.reduce_mean(y_true)
  nse = 1 - (tf.reduce_sum(tf.square(y_true - y_pred)) / tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true))))

  return {
          "mse": mse.numpy(),
          "rmse": rmse.numpy(),
          "nrmse": nrmse.numpy(),
          "nse": nse.numpy()
         }


In [3]:

def make_preds(model, input_data):
  """
  Uses model to make predictions on input_data.

  Parameters
  ----------
  model: trained model
  input_data: windowed input data (same kind of data model was trained on)

  Returns model predictions on input_data.
  """
  forecast = model.predict(input_data)
  return tf.squeeze(forecast) # return 1D array of predictions


In [4]:
# Create a function to plot time series data
def plot_time_series(timesteps, values, format='.', start=0, end=None, label=None):
  """
  Plots a timesteps (a series of points in time) against values (a series of values across timesteps).

  Parameters
  ---------
  timesteps : array of timesteps
  values : array of values across time
  format : style of plot, default "."
  start : where to start the plot (setting a value will index from start of timesteps & values)
  end : where to end the plot (setting a value will index from end of timesteps & values)
  label : label to show on plot of values
  """
  # Plot the series
  plt.plot(timesteps[start:end], values[start:end], format, label=label)
  plt.xlabel("Time")
  plt.ylabel("BTC Price")
  if label:
    plt.legend(fontsize=14) # make label bigger
  plt.grid(True)


In [5]:

import pandas as pd
df=pd.read_csv("2328522.csv",parse_dates=True,index_col=0)
df = df.dropna()
df



  df=pd.read_csv("2328522.csv",parse_dates=True,index_col=0)


Unnamed: 0_level_0,Discharge
Daily Date,Unnamed: 1_level_1
2001-01-01,720.0
2001-01-02,799.0
2001-01-03,871.0
2001-01-04,889.0
2001-01-05,845.0
...,...
2023-05-27,1680.0
2023-05-28,1460.0
2023-05-29,1250.0
2023-05-30,1090.0


In [6]:
import numpy as np
import tensorflow as tf

well=df["Discharge"].to_numpy()
well

timesteps = df.index.to_numpy()

timesteps[:10]


# Create train and test splits the right way for time series data
split_size = int(0.8 * len(well)) # 80% train, 20% test

# Create train data splits (everything before the split)
X_train, y_train = timesteps[:split_size], well[:split_size]

# Create test data splits (everything after the split)
X_test, y_test = timesteps[split_size:], well[split_size:]

len(X_train), len(X_test), len(y_train), len(y_test)


# Create function to label windowed data
def get_labelled_windows(x, horizon=1):
  """
  Creates labels for windowed dataset.

  E.g. if horizon=1 (default)
  Input: [1, 2, 3, 4, 5, 6] -> Output: ([1, 2, 3, 4, 5], [6])
  """
  return x[:, :-horizon], x[:, -horizon:]


WINDOW_SIZE=1
HORIZON=1
def make_windows(x, window_size=1, horizon=1):
  """
  Turns a 1D array into a 2D array of sequential windows of window_size.
  """
  # 1. Create a window of specific window_size (add the horizon on the end for later labelling)
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  # print(f"Window step:\n {window_step}")

  # 2. Create a 2D array of multiple window steps (minus 1 to account for 0 indexing)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T # create 2D array of windows of size window_size
  # print(f"Window indexes:\n {window_indexes[:3], window_indexes[-3:], window_indexes.shape}")

  # 3. Index on the target array (time series) with 2D array of multiple window steps
  windowed_array = x[window_indexes]

  # 4. Get the labelled windows
  windows, labels = get_labelled_windows(windowed_array, horizon=horizon)

  return windows, labels

full_windows, full_labels = make_windows(well, window_size=WINDOW_SIZE, horizon=HORIZON)
len(full_windows), len(full_labels)

for i in range(3):
  print(f"Window: {full_windows[i]} -> Label: {full_labels[i]}")

def make_train_test_splits(windows, labels, test_split=0.2):
  """
  Splits matching pairs of windows and labels into train and test splits.
  """
  split_size = int(len(windows) * (1-test_split)) # this will default to 80% train/20% test
  train_windows = windows[:split_size]
  train_labels = labels[:split_size]
  test_windows = windows[split_size:]
  test_labels = labels[split_size:]
  return train_windows, test_windows, train_labels, test_labels

train_windows, test_windows, train_labels, test_labels = make_train_test_splits(full_windows, full_labels)
len(train_windows), len(test_windows), len(train_labels), len(test_labels)

train_windows[:5], train_labels[:5]

Window: [720.] -> Label: [799.]
Window: [799.] -> Label: [871.]
Window: [871.] -> Label: [889.]


(array([[720.],
        [799.],
        [871.],
        [889.],
        [845.]]),
 array([[799.],
        [871.],
        [889.],
        [845.],
        [771.]]))

In [7]:

import os

# Create a function to implement a ModelCheckpoint callback with a specific filename
def create_model_checkpoint(model_name, save_path="model_experiments"):
  return tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(save_path, model_name), # create filepath to save model
                                            verbose=0, # only output a limited amount of text
                                            save_best_only=True) # save only the best model to file

In [8]:
import tensorflow as tf
from tensorflow.keras import layers

In [9]:
tf.random.set_seed(42)

# Let's build an LSTM model with the Functional API
inputs = layers.Input(shape=(WINDOW_SIZE))
x = layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(inputs) # expand input dimension to be compatible with LSTM
# print(x.shape)
# x = layers.LSTM(128, activation="relu", return_sequences=True)(x) # this layer will error if the inputs are not the right shape
x = layers.LSTM(50, activation="relu")(x) # using the tanh loss function results in a massive error
# print(x.shape)
# Add another optional dense layer (you could add more of these to see if they improve model performance)
x = layers.Dense(50, activation="relu")(x)
x = layers.Dense(50, activation="relu")(x)
output = layers.Dense(HORIZON)(x)
model_4 = tf.keras.Model(inputs=inputs, outputs=output, name="model_4_lstm")

# Compile model
model_4.compile(loss="mae",
                optimizer=tf.keras.optimizers.Adam())

# Seems when saving the model several warnings are appearing: https://github.com/tensorflow/tensorflow/issues/47554
model_4.fit(train_windows,
            train_labels,
            epochs=100,
            verbose=1,
            batch_size=128,
            validation_data=(test_windows, test_labels),
            callbacks=[create_model_checkpoint(model_name=model_4.name)])

Epoch 1/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 2/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 3/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 11/100
Epoch 12/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 13/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 95/100
Epoch 96/100
Epoch 97/100


INFO:tensorflow:Assets written to: model_experiments/model_4_lstm/assets


Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x28ea0a350>

In [10]:
model_4= tf.keras.models.load_model("model_experiments/model_4_lstm/")
model_4.evaluate(test_windows, test_labels)



128.90579223632812

In [11]:
# Make predictions with our LSTM model
model_4_preds = make_preds(model_4, test_windows)
model_4_preds[:10]



<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([12126.826 ,  9644.6045,  7200.5684,  5625.3115,  4622.8755,
        3906.8494,  3314.9346,  2828.037 ,  2694.3792,  3544.063 ],
      dtype=float32)>

In [12]:
# Evaluate model 5 preds
model_4_results = evaluate_preds(y_true=tf.squeeze(test_labels),
                                 y_pred=model_4_preds)
model_4_results


{'mse': 100051.14, 'rmse': 316.30862, 'nrmse': 0.3237986, 'nse': 0.93930715}