In [None]:
%pip install zstandard pandas plotly scikit-learn

Collecting wget
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l- done
[?25h  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=fd1eba0c287e5a6cc7306f1e890d203e432d40496ec2c92445083569d89cd4c1
  Stored in directory: /root/.cache/pip/wheels/bd/a8/c3/3cf2c14a1837a4e04bd98631724e81f33f462d86a1d895fae0
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
import os
import requests
from pathlib import Path
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from zstandard import ZstdCompressionWriter, ZstdDecompressor
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from numpy import array2string
from datetime import datetime

In [2]:
# Local instance
import plotly.io as pio
# Set plotly render
pio.renderers.default = "colab"
%cd "/tf/work"

/tf/work


In [None]:
# Download all model versions
print(f'Downloading fyp-forecasting-models-1_18.zip')
r = requests.get('https://files.nekoul.com/pub/fyp-forecasting-models-1_18.zip')
if not r.ok:
  print('Unable to download the archieve')
  exit(128)

with open('models-1_18.zip', 'wb') as f:
    f.write(r.content)

!unzip -nq models-1_18.zip
print(f'Extracted all models')

Downloading fyp-forecasting-models-1_18.zip
Extracted all models


In [3]:
!ls -al model

total 100
drwxr-xr-x 25 root root 4096 Mar 29 11:04 .
drwxrwxr-x  3 1000 1000 4096 Mar 29 11:35 ..
drwxr-xr-x  4 root root 4096 Mar 21 00:13 v1
drwxr-xr-x  4 root root 4096 Mar 20 16:37 v10
drwxr-xr-x  4 root root 4096 Mar 20 16:37 v11
drwxr-xr-x  4 root root 4096 Mar 20 16:37 v12
drwxr-xr-x  4 root root 4096 Mar 20 16:37 v13
drwxr-xr-x  4 root root 4096 Mar 20 16:37 v14
drwxr-xr-x  4 root root 4096 Mar 20 16:37 v15
drwxrwxr-x  4 1000 1000 4096 Mar 20 09:11 v16
drwxrwxr-x  4 1000 1000 4096 Mar 20 09:11 v17
drwxr-xr-x  4 root root 4096 Mar 20 15:24 v18
drwxr-xr-x  4 root root 4096 Mar 29 07:04 v19
drwxr-xr-x  4 root root 4096 Mar 21 00:13 v2
drwxr-xr-x  4 root root 4096 Mar 29 07:40 v20
drwxr-xr-x  4 root root 4096 Mar 29 09:21 v21
drwxr-xr-x  4 root root 4096 Mar 29 10:51 v22
drwxr-xr-x  4 root root 4096 Mar 29 11:05 v23
drwxr-xr-x  4 root root 4096 Mar 21 00:13 v3
drwxr-xr-x  4 root root 4096 Mar 21 00:13 v4
drwxr-xr-x  4 root root 4096 Mar 21 00:13 v5
drwxr-xr-x

In [None]:
# Google Colab instance
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
# Create a working folder and cd to it.
!mkdir -p "/content/drive/MyDrive/Courses/EIE/Year 4/FYP"
%cd "/content/drive/MyDrive/Courses/EIE/Year 4/FYP"

Mounted at /content/drive
/content/drive/MyDrive/Courses/EIE/Year 4/FYP


In [108]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [422]:
!rm journal_time_data_hk.csv

In [5]:
DATASET_FILE_PATH = 'journal_time_data_hk.csv'
if not Path(DATASET_FILE_PATH).exists():
  print(f'Downloading journal_time_data_hk.csv.zst')
  r = requests.get('https://files.nekoul.com/pub/journal_time_data_hk.csv.zst')
  if not r.ok:
    print('Unable to download the datasets')
    exit(128)

  with open(DATASET_FILE_PATH, 'wb+') as f:
    print(f'Decompressing {DATASET_FILE_PATH}')
    dctx = ZstdDecompressor()
    decompressor = dctx.stream_writer(f)
    decompressor.write(r.content)
    print(f'Decompression done')

In [6]:
df = pd.read_csv(DATASET_FILE_PATH)
feature_keys = [
  'H1-CH', 'H1-EH', 'H11-CH', 'H11-EH', 'H2-CH', 'H2-EH', 'H2-WH', 'H3-CH', 'H3-WH', 'H4-CH', 'H4-EH', 'H4-WH',
  'H5-CH', 'H5-EH', 'H5-WH', 'K01-CH', 'K01-WH', 'K02-CH', 'K02-EH', 'K03-CH', 'K03-EH', 'K03-WH', 'K04-CH', 'K04-WH',
  'K05-CH', 'K05-EH', 'K06-CH', 'K06-WH'
]
df.index = pd.to_datetime(df['timestamp'])
df.drop('timestamp', axis=1, inplace=True)

df.shape

(462959, 31)

In [7]:
def plot(data, title: str = '', *,
         xaxis_title='Time', yaxis_title='Journey time', yaxis_range: list = None, rows=1, cols=1,
         names: list = None, subplot_pos: list = None, subplot_titles: list = None, indexes: list = None):
  fig = make_subplots(rows=rows, cols=cols, subplot_titles=subplot_titles)
  for idx, s in enumerate(data):
    pos = subplot_pos[idx] if subplot_pos is not None else idx
    index = indexes[idx] if indexes is not None else None
    name = names[idx] if names is not None else None
    row = pos // rows
    col = pos % cols + 1
    fig.add_trace(
      go.Scatter(x=index, y=s, name=name),
      row = pos // (rows + 1) + 1,
      col = pos % cols + 1,
    )
    fig.update_xaxes(title_text=xaxis_title, row=row, col=col)
    fig.update_yaxes(title_text=yaxis_title, row=row, col=col)

  fig.update_layout(
    title={
      'text': title,
      'x': 0.5,
    },
    yaxis_range=yaxis_range
  )

  fig.show()

In [8]:
# Filter invalid time
skip_date = pd.to_datetime("2022-07-11")
util_date = pd.to_datetime("2023-11-10")
df = df[(df.index > skip_date) & (df.index <= util_date)]

df = df.resample('5Min').interpolate(method='time').iloc[1:]
df['week_day'] = df.index.dayofweek.values
df['hour'] = df.index.hour.values
df['minute'] = df.index.minute.values

df.shape

(72863, 34)

In [93]:
n_feature = 4

cht = df[['K02-CH', 'week_day', 'hour', 'minute']]
# cht = cht[cht['K02-CH'] != -1]
# Replace the invalid data with the average value instead of removing it
cht_mean = cht.loc[:,'K02-CH'].mean()
cht.loc[:,'K02-CH'].replace(-1, cht_mean, inplace=True)

cht.shape



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(72863, 4)

In [94]:
# Smooth traffic data by move average
cht['K02-CH'] = cht['K02-CH'].rolling(6).mean().shift(periods=-2).fillna(cht_mean)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [95]:
cht

Unnamed: 0_level_0,K02-CH,week_day,hour,minute
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-11 00:05:00,9.918484,0,0,5
2022-07-11 00:10:00,9.918484,0,0,10
2022-07-11 00:15:00,9.918484,0,0,15
2022-07-11 00:20:00,4.500000,0,0,20
2022-07-11 00:25:00,4.333333,0,0,25
...,...,...,...,...
2023-03-20 23:35:00,5.000000,0,23,35
2023-03-20 23:40:00,5.000000,0,23,40
2023-03-20 23:45:00,5.000000,0,23,45
2023-03-20 23:50:00,9.918484,0,23,50


In [96]:
# plot([
#   cht,
#   cht_5m,
#   cht_10m,
#   cht_30m,
# ], 'K02-CH', rows=2, cols=2, subplot_titles=['1 Min', '5 Min', '10 Min', '30 Min'])
plot([cht['K02-CH']], "K02-CH", indexes=[cht.index, cht.index], subplot_pos=[0, 0, 0], names=['K02-CH'])
# plot([cht['K02-CH']], indexs=[cht.index])

In [99]:
n_feature = 4

eht = df[['K02-EH', 'week_day', 'hour', 'minute']]
# Replace the invalid data with the average value instead of removing it
eht_mean = eht.loc[:,'K02-EH'].mean()
eht.loc[:,'K02-EH'].replace(-1, eht_mean, inplace=True)

eht.shape



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(72863, 4)

In [100]:
# Smooth traffic data by move average
eht['K02-EH'] = eht['K02-EH'].rolling(6).mean().shift(periods=-2).fillna(eht_mean)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [101]:
eht

Unnamed: 0_level_0,K02-EH,week_day,hour,minute
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-11 00:05:00,12.954483,0,0,5
2022-07-11 00:10:00,12.954483,0,0,10
2022-07-11 00:15:00,12.954483,0,0,15
2022-07-11 00:20:00,11.000000,0,0,20
2022-07-11 00:25:00,11.000000,0,0,25
...,...,...,...,...
2023-03-20 23:35:00,11.000000,0,23,35
2023-03-20 23:40:00,11.000000,0,23,40
2023-03-20 23:45:00,11.000000,0,23,45
2023-03-20 23:50:00,12.954483,0,23,50


In [102]:
plot([eht['K02-EH']], "K02-EH", indexes=[eht.index, eht.index], subplot_pos=[0, 0, 0], names=['K02-EH'])

In [459]:
# def splint_and_normalize_dataset(data, *,
#                                  train_ratio=0.7, val_ratio=0.2) -> (pd.Series, pd.Series, pd.Series):
#   data_n = data.shape[0]
#   train_n = int(data_n * train_ratio)
#   val_n = int(data_n * val_ratio)

#   train_data: pd.Series = data.iloc[:train_n]
#   # Normalize data
#   mean = train_data.mean()
#   std = train_data.std()
#   train_data = (train_data - mean) / std
#   val_data = (data.iloc[train_n:train_n + val_n] - mean) / std
#   test_data = (data.iloc[train_n + val_n:] - mean) / std
#   return train_data, val_data, test_data

In [16]:
# train, val, test = splint_and_normalize_dataset(cht_5m)
# Split data into test/validate/test dataset (70/20/10)
data = eht
data_n = data.shape[0]
train_n = int(data_n * 0.70)
val_n = int(data_n * 0.20)

# mean = cht['K02-CH'].mean()
# std = cht['K02-CH'].std()
# train_data = cht.iloc[:train_n]
# train_data.loc[:,'K02-CH'] = (train_data['K02-CH'] - mean) / std
# val_data = cht.iloc[train_n:train_n + val_n]
# val_data.loc[:,'K02-CH'] = (val_data['K02-CH'] - mean) / std
# test_data = cht.iloc[train_n + val_n:]
# test_data.loc[:,'K02-CH'] = (test_data['K02-CH'] - mean) / std

# mm = MinMaxScaler(feature_range=(0, 1))
# data = mm.fit_transform(cht)
data = data.to_numpy()
train_data = data[:train_n]
val_data = data[train_n:train_n + val_n]
test_data = data[train_n + val_n:]

print(f'Train data shape={train_data.shape}')
print(f'Validation data shape={val_data.shape}')
print(f'Test data shape={test_data.shape}')
print(f'index shape={eht.index.shape}')
plot([train_data[:,0], val_data[:,0], test_data[:,0]], 'K02-EH', rows=1, cols=1,
     names=['Train', 'Validation', 'Test'], subplot_pos=[0, 0, 0], indexes=[eht.index[:train_n], eht.index[train_n:train_n + val_n], eht.index[train_n + val_n:]])

Train data shape=(50845, 4)
Validation data shape=(14527, 4)
Test data shape=(7264, 4)
index shape=(72636,)


In [17]:
def make_windows(data, n_steps = 12 * 12, n_horizon = 12 * 3, batch_size = 256, shift = 1, shuffle_size = 500):
  # Use the previous data points to predict the next n_horizon data points
  window = n_steps + n_horizon
  ds = tf.data.Dataset.from_tensor_slices(data)

  # Create the window combined the steps and horizon
  ds = ds.window(window, shift=shift, drop_remainder=True)
  # window() return nested dataset of windows but a regular dataset containing tensors is needed
  ds = ds.flat_map(lambda x : x.batch(window))
  if shuffle_size > 0:
    ds = ds.shuffle(shuffle_size)
  # Extract the features and labels from each windows
  ds = ds.map(lambda x : (x[:-n_horizon], x[-n_horizon:, :1]))
  # Batch the dataset
  ds = ds.batch(batch_size).prefetch(1)
  
  return ds

In [18]:
# Prediction horizon
# Use the past n_steps  data pointsto predict the next n_horizon data points
n_steps = 12 * 6
n_horizon = 12 * 3

In [19]:
# WINDOW_SIZE=24
# train_inputs, train_labels = make_dataset(train, WINDOW_SIZE)
# val_inputs, val_labels = make_dataset(val, WINDOW_SIZE)
# test_inputs, test_labels = make_dataset(test, WINDOW_SIZE)
# train_inputs.shape, train_labels.shape, val_inputs.shape, val_labels.shape, test_inputs.shape, test_labels.shape

# Window config
batch_size = 128
shift = 1

train = make_windows(train_data, n_steps, n_horizon, batch_size, shift)
val = make_windows(val_data, n_steps, n_horizon, batch_size, shift)
test = make_windows(test_data, n_steps, n_horizon, batch_size, shift)

for idx, (x,y) in enumerate(train):
    print("feature shape=", x.numpy().shape)
    print("label shape=", y.numpy().shape)
    break
print(f"train spec={train.element_spec}")
print(f"val spec={val.element_spec}")
print(f"test spec={test.element_spec}")

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
feature shape= (128, 72, 4)
label shape= (128, 36, 1)
train spec=(TensorSpec(shape=(None, None, 4), dtype=tf.float64, name=None), TensorSpec(shape=(None, None, 1), dtype=tf.float64, name=None))
val spec=(TensorSpec(shape=(None, None, 4), dtype=tf.float64, name=None), TensorSpec(shape=(None, None, 1), dtype=tf.float64, name=None))
test spec=(TensorSpec(shape=(None, None, 4), dtype=tf.float64, name=None), TensorSpec(shape=(None, None, 1), dtype=tf.float64, name=None))


In [20]:
from keras.models import Sequential
from keras.layers import InputLayer, LSTM, Dense, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Reshape, Flatten, Dropout, Bidirectional, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.losses import MeanSquaredError, MeanAbsolutePercentageError, MeanAbsoluteError as MeanAbsoluteErrorLoss, Huber
from keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from keras.optimizers import Adam
from keras.models import load_model

In [54]:
model_title = "CNN-LSTM (6h-to-3h, MSE, Batch 128, 3CNN)"
model_name = "v23"

In [42]:
# CNN-LSTM model
cnn_lstm_model = Sequential()
# Convolutional layer with 128 filters with the size of 3
cnn_lstm_model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(n_steps, n_feature)))
# Convolutional layer with 128 filters with the size of 3
cnn_lstm_model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
# Convolutional layer with 128 filters with the size of 3
cnn_lstm_model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
# MaxPooling2D layer with kernel size of 3
cnn_lstm_model.add(MaxPooling1D(pool_size=3))
# Dropout with the possibility of 0.8
cnn_lstm_model.add(Dropout(0.25))
# LSTM layer with 200 unit and use return_sequence (pass the output of each time step to the next layer)
cnn_lstm_model.add(Bidirectional(LSTM(400)))
# Dense Layer with 32 neutrons
cnn_lstm_model.add(Dense(32, 'relu'))
# Output layer to output the next n_horizon time steps
cnn_lstm_model.add(Dense(n_horizon, 'linear'))
cnn_lstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 70, 128)           1664      
                                                                 
 conv1d_4 (Conv1D)           (None, 68, 128)           49280     
                                                                 
 conv1d_5 (Conv1D)           (None, 66, 128)           49280     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 22, 128)          0         
 1D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 22, 128)           0         
                                                                 
 bidirectional_1 (Bidirectio  (None, 800)              1692800   
 nal)                                                 

In [43]:
epoch_n = 100
learning_rate = 0.001
print(f"model={model_name}, title={model_title}")

checkpoint = ModelCheckpoint(f'model/{model_name}', save_best_only=True)
early_stop = EarlyStopping(monitor='val_loss', patience=15)
# Comment this line if train a new model
#CNN_LSTM_loaded_model = load_model('model/v6')
cnn_lstm_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=learning_rate), metrics=[MeanAbsoluteError()])

model=v23, title=CNN-LSTM (6h-to-3h, MSE, Batch 128, 3CNN)


In [44]:
history = cnn_lstm_model.fit(train, validation_data=val, epochs=epoch_n, callbacks=[checkpoint, early_stop])

Epoch 1/100
    396/Unknown - 11s 21ms/step - loss: 16.2250 - mean_absolute_error: 2.4971



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 2/100
Epoch 3/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 4/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 5/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 6/100
Epoch 7/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 8/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 9/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 10/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 11/100
Epoch 12/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 48/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 54/100
Epoch 55/100
Epoch 56/100



INFO:tensorflow:Assets written to: model/v23/assets


INFO:tensorflow:Assets written to: model/v23/assets


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100


In [46]:
plot([history.history['loss'], history.history['val_loss'], history.history['mean_absolute_error'], history.history['val_mean_absolute_error']], model_title, 
rows=1, cols=2, xaxis_title="epoch", yaxis_title="", subplot_pos=[0, 0, 1, 1], names=["loss", "val_loss", "mean_absolute_error", "val_mean_absolute_error"], subplot_titles=["loss", "mean_absolute_error"])

In [103]:
load_model_name = "v23"
load_model_title = "CNN-LSTM (6h-to-3h, MSE, Batch 128, 3CNN, Dropout 0.25)"
loaded_model = load_model(f'model/{load_model_name}')

In [None]:
def inverse_transform_single(data, scaler, axis=0):
  new_scaler = MinMaxScaler()
  new_scaler.min_, new_scaler.scale_ = scaler.min_[axis], scaler.scale_[axis]
  return new_scaler.inverse_transform(data)

In [104]:
# Retrieve the prediction result (single)
test_sample = test_data
test_shift = 12 * 22
print(f"test_sample shape={test_sample[:n_steps].shape}")
result = loaded_model.predict(np.expand_dims(test_sample[test_shift:n_steps+test_shift], axis=0))
# Inverse transform the normalized data (since the train)
# result = inverse_transform_single(result, mm, 0)
# result = np.append(np.full(n_steps, np.nan), result.flatten())
result = result.flatten()
actual = test_sample[n_steps+test_shift:n_steps+n_horizon+test_shift, 0]
# actual = inverse_transform_single(actual.reshape(-1, 1), mm, 0)
actual = actual.flatten()
print(f"result shape={result.shape}")
print(f"actual shape={actual.shape}")
plot([result, actual], load_model_title, names=['Predictions', 'Actuals'], subplot_pos=[0, 0], yaxis_range=[0, 30])

test_sample shape=(72, 4)
result shape=(36,)
actual shape=(36,)


In [28]:
def make_input_windows(data, n_steps, n_horizon):
  # Generate feature input windows for full prediction over the actual results
  ds = tf.data.Dataset.from_tensor_slices(data)
  # Create the window combined the steps and horizon
  ds = ds.window(n_steps, shift=n_horizon, drop_remainder=True)
  # window() return nested dataset of windows but a regular dataset containing tensors is needed
  ds = ds.flat_map(lambda x : x.batch(n_steps))
  # Raise the dimension of the output
  ds = ds.batch(1)
  
  return ds

In [105]:
# Full forecasting
test_sample_full = test_data
input_full = make_input_windows(test_sample_full, n_steps, n_horizon)
result_full = loaded_model.predict(input_full)
result_full = result_full.flatten()
actual_full = test_sample_full[n_steps:, 0]
actual_full = actual_full.flatten()
actual_full = np.pad(actual_full, (0, result_full.size - actual_full.size), 'constant', constant_values=(np.nan,))
print(f"result shape={result_full.shape}")
print(f"actual shape={actual_full.shape}")
plot([result_full, actual_full], f"{load_model_title} (Full)", names=['Predictions', 'Actuals'], subplot_pos=[0, 0], yaxis_range=[0, 35])

result shape=(7200,)
actual shape=(7200,)


In [106]:
results = loaded_model.evaluate(train)
print(f"test loss={results[0]}, test acc={results[1]}")
results = loaded_model.evaluate(val)
print(f"val loss={results[0]}, test acc={results[1]}")
results = loaded_model.evaluate(test)
print(f"test loss={results[0]}, test acc={results[1]}")

test loss=3.2765185832977295, test acc=1.0346516370773315
val loss=3.4206762313842773, test acc=1.042662262916565
test loss=5.112307548522949, test acc=1.2061337232589722
