In [1]:
# Both mxnet and gluonts should be installed, in that order, using pip, not conda. The commandas are
# %pip install mxnet
# %pip install gluonts

# To install specific versions, run 
# %pip install mxnet==1.9.1
# %pip install gluonts==0.9.4

# For other libraries, the below can be followed:
# If the libraries are not yet installed, they can be installed in this notebook using commands similar to the below
# %conda install numpy
# %conda install pandas
# %conda install matplotlib
# %conda install scikit-learn
# %conda install joblib
# %conda install tdqm

# Something like the following may also work if the above does not
# import sys
# !conda install --yes --prefix {sys.prefix} numpy
# !conda install --yes --prefix {sys.prefix} pandas
# !conda install --yes --prefix {sys.prefix} scikit-learn
# !conda install --yes --prefix {sys.prefix} joblib
# !conda install --yes --prefix {sys.prefix} tdqm

# To install a specific version, add the version to the install command
# E.g., %conda install numpy=1.20.3

# If all else fails, use pip or follow additional advice such as found at
# https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/

# If your plan to use pip (especially if you are not working within a specified conda environment), 
# the pip commands might look like:
# pip install numpy
# pip install pandas
# pip install scikit-learn
# pip install joblib
# pip install tdqm

# To install a specific version, add the version to the pip install command
# E.g., pip install numpy==1.20.3

In [2]:
# Load all necessary libraries 
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import glob
import random

In [3]:
import mxnet as mx
from gluonts.model import deepar
from gluonts.mx.trainer import Trainer
from gluonts.dataset.common import ListDataset
from gluonts.evaluation import make_evaluation_predictions

In [4]:
from joblib import Parallel, delayed
import contextlib
import joblib
from tqdm import tqdm
import pickle
import os

In [5]:
# Set seeds for reproducibility
random.seed(54321)
np.random.seed(54321)
mx.random.seed(54321)

In [6]:
# Create directories to store results
os.makedirs("Results/Global/DeepAR/Full/", exist_ok=True)
os.makedirs("Results/Global/DeepAR/Highway System/", exist_ok=True)
os.makedirs("Results/Global/DeepAR/Random Cluster/", exist_ok=True)
os.makedirs("Results/Global/DeepAR/Catch22 KMeans/", exist_ok=True)
os.makedirs("Results/Global/DeepAR/TSFeat KMeans/", exist_ok=True)
os.makedirs("Results/Global/DeepAR/DTW/", exist_ok=True)

# Read in Data and Prepare for Modeling

In [7]:
# Read in all csv files for England from the Processed directory and save in a list

# Create an empty list
england_df_list = list()

# Loop through all file names in alphebtical order to match R list.files function, 
# read into a pandas df, ensure each df is in chronological order, and append to list
for fname in sorted(glob.glob("Data/Processed/Highways_England/*.csv")):
    print("Reading {}".format(fname))
    df = pd.read_csv(fname)
    df = df.sort_values(by="timestamp")
    england_df_list.append(df)

Reading Data/Processed/Highways_England/A11-6310-1_Southbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A11-6312-2_Northbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A14-1107A_Eastbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A14-1144B_Westbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A1M-9842B_Southbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A1M-9847a_Northbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A46-7636-1_Northbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A46-7636-2_Southbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A47-6337-1_Westbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A47-6337-2_Eastbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A5-6847-2_Southbound_2019_Processed.csv
Reading Data/Processed/Highways_England/A5-7572-1-Northbound_2019_Processed.csv
Reading Data/Processed/Highways_Englan

In [8]:
# Read in all csv files for Portland from the Processed directory and save in a list
portland_df_list = list()

for fname in sorted(glob.glob("Data/Processed/Portland/*.csv")):
    print("Reading {}".format(fname))
    df = pd.read_csv(fname)
    df = df.sort_values(by="timestamp")
    portland_df_list.append(df)

Reading Data/Processed/Portland/I205-101068_Northbound_2019_Processed.csv
Reading Data/Processed/Portland/I205-101073_Southbound_2019_Processed.csv
Reading Data/Processed/Portland/I405-100395_Southbound_2019_Processed.csv
Reading Data/Processed/Portland/I405-100527_Northbound_2019_Processed.csv
Reading Data/Processed/Portland/I5-100688_Southbound_2019_Processed.csv
Reading Data/Processed/Portland/I5-100703_Northbound_2019_Processed.csv
Reading Data/Processed/Portland/I84-101108_Eastbound_2019_Processed.csv
Reading Data/Processed/Portland/I84-101161_Westbound_2019_Processed.csv
Reading Data/Processed/Portland/OR217-100300_Southbound_2019_Processed.csv
Reading Data/Processed/Portland/OR217-100314_Northbound_2019_Processed.csv
Reading Data/Processed/Portland/R2 Delta Hwy-101745_Northbound_2019_Processed.csv
Reading Data/Processed/Portland/R2 OR18-102111_Westbound_2019_Processed.csv
Reading Data/Processed/Portland/R2 OR18-102113_Eastbound_2019_Processed.csv
Reading Data/Processed/Portland/

In [9]:
# Read in all csv files for Utah from the Processed directory and save in a list
utah_df_list = list()

for fname in sorted(glob.glob("Data/Processed/Utah/*.csv")):
    print("Reading {}".format(fname))
    df = pd.read_csv(fname)
    df = df.sort_values(by="timestamp")
    utah_df_list.append(df)

Reading Data/Processed/Utah/I15-3103178_Southbound_2019_Processed.csv
Reading Data/Processed/Utah/I15-749_Northbound_2019_Processed.csv
Reading Data/Processed/Utah/I215-134_Counterclockwise_2019_Processed.csv
Reading Data/Processed/Utah/I215-31_Clockwise_2019_Processed.csv
Reading Data/Processed/Utah/I70-3103400_Westbound_2019_Processed.csv
Reading Data/Processed/Utah/I70-3103401_Eastbound_2019_Processed.csv
Reading Data/Processed/Utah/I80-600_Eastbound_2019_Processed.csv
Reading Data/Processed/Utah/I80-667_Westbound_2019_Processed.csv
Reading Data/Processed/Utah/I84-451_Eastbound_2019_Processed.csv
Reading Data/Processed/Utah/I84-482_Westbound_2019_Processed.csv
Reading Data/Processed/Utah/LegacyParkway-810_Northbound_2019_Processed.csv
Reading Data/Processed/Utah/LegacyParkway-890_Southbound_2019_Processed.csv
Reading Data/Processed/Utah/US189-260_Westbound_2019_Processed.csv
Reading Data/Processed/Utah/US189-470_Eastbound_2019_Processed.csv
Reading Data/Processed/Utah/US40-634_Westb

In [10]:
# Append all lists together to create one total list of data frames
total_df_list = england_df_list + portland_df_list + utah_df_list

In [11]:
# Read in the start and end points csv, and subtract 1 to deal with index differences between R and python
start_end = pd.read_csv("start_end_points.csv")
start_end["start"] = start_end["start"] - 1
start_end["end"] = start_end["end"]

In [12]:
# Initialize an empty list
subset_df_list = list()

# For each df in the total_df_list
for idx, df in enumerate(total_df_list):
    
    # subset the df using the start and end points
    subset_df = df.iloc[start_end.iloc[idx,0]:start_end.iloc[idx,1], ]\
    .reset_index(drop=True).reset_index(drop=False)\
    .rename(columns={"index":"rn"}) # Create a row_num col based on the index of the data frame
    
    # Create a train_val_test field to denote which set each observation is a part of
    subset_df["train_val_test"] = np.where(subset_df["rn"]<(96*7*8),
                                           "train",
                                           np.where(subset_df["rn"]<(96*7*10),
                                                    "val",
                                                    "test"
                                                   )
                                       )

    # Append the subset df to the list
    subset_df_list.append(subset_df)

In [13]:
# Create two lists, one for the train_val data and one for the test data
train_df_list = list()
test_df_list = list()

# Loop through the list of subset dfs
for df in subset_df_list:
    
    # Filter to train and val only for the training data
    train_df = df.query("train_val_test != 'test'").copy()
    train_df = train_df[['timestamp', 'total_volume']]\
    .rename(columns={'timestamp':'ds', 'total_volume':'y'}) # For prophet, we must rename our timestamp column to ds and out target to y
    
    # Append to list
    train_df_list.append(train_df)
    
    # Same steps for test df list
    test_df = df.query("train_val_test == 'test'").copy()
    test_df = test_df[['timestamp', 'total_volume']]\
    .rename(columns={'timestamp':'ds', 'total_volume':'y'})
    test_df_list.append(test_df)

# Global Model for All Data

## Set up Model and Training Data

In [14]:
# Set the model's prediction length to 1 for one-step ahead forecasts
# Set the context_length to 672 for the weekly seasonality
prediction_length = 1
context_length = 672

In [15]:
# Create a DeepAR estimator object, specifying the freqeuncy of the data to be 15 minutes
estimator = deepar.DeepAREstimator(freq="15min",
                                   context_length=context_length,
                                   prediction_length=prediction_length
                                  )

In [16]:
# Create a list of dictionaries from the training dataframe list. GluonTS requires a list of dictionaries with
# keys "start" and "target", where start is the first timestamp in the data set and target is a list of all
# time series values
train_dataset_dict_list = [{"start": df.iloc[0,0], "target": df.y.values} for df in train_df_list]

In [17]:
# Turn the list of dictionaries into a GlutonTS training data set, specifying data frequency again
train_data = ListDataset(train_dataset_dict_list, freq="15min")

# Fit Model

In [18]:
# Fit the model using default training set up and hyperparameters
np.random.seed(54321)
mx.random.seed(54321)
mod = estimator.train(training_data=train_data)

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
100%|█████████| 50/50 [00:48<00:00,  1.04it/s, epoch=1/100, avg_epoch_loss=5.48]
100%|█████████| 50/50 [00:42<00:00,  1.17it/s, epoch=2/100, avg_epoch_loss=4.73]
100%|█████████| 50/50 [00:42<00:00,  1.17it/s, epoch=3/100, avg_epoch_loss=4.57]
100%|█████████| 50/50 [00:42<00:00,  1.18it/s, epoch=4/100, avg_epoch_loss=4.56]
100%|█████████| 50/50 [00:43<00:00,  1.16it/s, epoch=5/100, avg_epoch_loss=4.52]
100%|█████████| 50/50 [00:42<00:00,  1.17it/s, e

In [19]:
# Pickle the model to save for later
with open(f'Results/Global/DeepAR/Full/mod', 'wb+') as f:
    pickle.dump(mod, f)

## Make One-Step Ahead Forecasts

In [20]:
# Set up empty dictionaries in which we will store forecasts
forecast_means = dict()
forecast_medians = dict()
forecast_q10s = dict()
forecast_q90s = dict()
forecast_q025s = dict()
forecast_q975s = dict()

# Initiliaze a key and emptry list as the value within each of the above dictionaries for each time series
for idx in range(1, 77):
    forecast_means[idx] = list()
    forecast_medians[idx] = list()
    forecast_q10s[idx] = list()
    forecast_q90s[idx] = list()
    forecast_q025s[idx] = list()
    forecast_q975s[idx] = list()
    
# For each time step in the test set
for n in range(test_df_list[0].shape[0]):
    
    print(n)
    
    # Create a data set from which we will forecast by appending, 1 time step at a time, data from the test set to
    # the training set. This is done as a list of dictionaries, and then a GluonTS data set is created
    eval_data_dict_list = [{"start": df.iloc[0,5],
                            "target": df.iloc[:,[5,8]].iloc[0:(96*10*7+n),:].total_volume.values} for df in subset_df_list]
    eval_data = ListDataset(eval_data_dict_list, freq="15min")
    
    np.random.seed(n)
    mx.random.seed(n)
    # Make the model forecasts on the eval data, electing to use 100 sample paths for evaluating intervals
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=eval_data,  # test dataset
        predictor=mod,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )
    
    # Cast the forecast_it object return from the above function to a list
    forecasts = list(forecast_it)
    
    # For each entry in that list
    for i in range(len(forecasts)):
        # Compute the mean and append to the apprporiate list - this is our point forecast
        forecast_mean = forecasts[i].samples.mean()
        forecast_means[i+1].append(forecast_mean)
        
        # Compute percentiles, including the median as an alternate point forecast and percentiles needed for 
        # the 80% and 95% PIs
        forecast_percentiles = np.percentile(forecasts[i].samples, [2.5, 10, 50, 90, 97.5])

        # Append those to the appropriate lists
        forecast_medians[i+1].append(forecast_percentiles[2])
        forecast_q10s[i+1].append(forecast_percentiles[1])
        forecast_q90s[i+1].append(forecast_percentiles[3])
        forecast_q025s[i+1].append(forecast_percentiles[0])
        forecast_q975s[i+1].append(forecast_percentiles[4])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

## Compute Point Forecast Performance

### Mean

In [21]:
# Set up a list to store RMSE
rmse_list = list()
# For each time series, compute the RMSE and append to the list
for idx in range(76):
    rmse_idx = mean_squared_error(forecast_means[idx+1], 
                                  test_df_list[idx].y,
                                  squared=False)
    rmse_list.append(rmse_idx)

In [22]:
# Print the mean RMSE for reporting in the paper
np.mean(rmse_list)

39.685352862255264

In [23]:
# Do the same with MAE
mae_list = list()
for idx in range(76):
    mae_idx = mean_absolute_error(forecast_means[idx+1], 
                                  test_df_list[idx].y)
    mae_list.append(mae_idx)

In [24]:
np.mean(mae_list)

26.76444519043996

In [25]:
# Compute the mean of each time series to use to scale the RMSE and MAE
true_means_list = list()
for idx in range(76):
    mean_y_i = np.mean(test_df_list[idx].y)
    true_means_list.append(mean_y_i)

In [26]:
np.mean(np.array(rmse_list)/np.array(true_means_list))

0.17300465964195857

In [27]:
np.mean(np.array(mae_list)/np.array(true_means_list))

0.11662157025849663

### Median

In [28]:
# Compute metrics using the median as the point forecast instead of the mean - note they are almost identical
median_rmse_list = list()
for idx in range(76):
    median_rmse_idx = mean_squared_error(forecast_medians[idx+1], 
                                         test_df_list[idx].y,
                                         squared=False)
    median_rmse_list.append(median_rmse_idx)

In [29]:
np.mean(median_rmse_list)

39.67527851930589

In [30]:
median_mae_list = list()
for idx in range(76):
    median_mae_idx = mean_absolute_error(forecast_medians[idx+1], 
                                         test_df_list[idx].y)
    median_mae_list.append(median_mae_idx)

In [31]:
np.mean(median_mae_list)

26.761807186675814

In [32]:
np.mean(np.array(median_rmse_list)/np.array(true_means_list))

0.17299661836565938

In [33]:
np.mean(np.array(median_mae_list)/np.array(true_means_list))

0.11662316119807117

## Compute PI Performance

In [14]:
# Create a function to compute the interval score
def interval_score(true_values, lower, upper, interval_range):
    """ Function which takes in the true values, the upper and lower bounds of PIs, and the PI level (e.g., 90%)
        and from these inputs, computes the interval score for each prediction
    """
    
    # Compute alpha from the interval range
    alpha = 100*(1-interval_range)
    
    # Save the upper, lower, and true_values as numpy arrays for computation purposes
    upper = np.array(upper)
    lower = np.array(lower)
    true_values = np.array(true_values)
    
    # Compute the lower component of the interval score - just a boolean for true below interval
    def lower_ind(true,low):
        if true<low:
            return 1
        else:
            return 0
        
    # Computer the upper component of the interval score - similar boolean for true above interval
    def upper_ind(true,up):
        if true>up:
            return 1
        else:
            return 0
        
    # Computer the actual score for each obsveration - formula here: https://epiforecasts.io/scoringutils/reference/interval_score.html
    scores = (upper-lower) + (2/alpha)*(lower-true_values)*(lower > true_values) + (2/alpha)*(true_values-upper)*(true_values > upper)
    
    # Return the scores array
    return scores

In [35]:
# Set up empty list to compute the PI scores for each forecast at 80% level
int_80_scores = list()
for idx in range(76):
    int_80_score_idx = interval_score(test_df_list[idx].y,
                                      forecast_q10s[idx+1],
                                      forecast_q90s[idx+1],
                                      0.8)
                                      
    int_80_scores.append(int_80_score_idx)

In [36]:
# Do the same at the 95% level
int_95_scores = list()
for idx in range(76):
    int_95_score_idx = interval_score(test_df_list[idx].y,
                                      forecast_q025s[idx+1],
                                      forecast_q975s[idx+1],
                                      0.95)
                                      
    int_95_scores.append(int_95_score_idx)

In [37]:
# Print the mean raw and scaled PI scores to include in the paper
np.mean(int_80_scores)

58.811731150732015

In [38]:
np.mean(int_95_scores)

97.88965223188873

In [39]:
np.mean(np.array(int_80_scores).mean(1)/np.array(true_means_list))

0.27425287572640916

In [40]:
np.mean(np.array(int_95_scores).mean(1)/np.array(true_means_list))

0.4567658744417653

In [41]:
# Set up a data frame of the forecast results by looping through the result dictionaries, casting to numpy arrays
# and flattening the arrays
full_model_test_pred_df = pd.DataFrame({"ts_index": np.array([[n]*1344 for n in range(1, 77)]).flatten(),
                                        "forec_mean": np.array([forecast_means[i] for i in forecast_means.keys()]).flatten(),
                                        "forec_median": np.array([forecast_medians[i] for i in forecast_medians.keys()]).flatten(),
                                        "lo_80": np.array([forecast_q10s[idx+1] for i in forecast_q10s.keys()]).flatten(),
                                        "hi_80": np.array([forecast_q90s[idx+1] for i in forecast_q90s.keys()]).flatten(),
                                        "lo_95": np.array([forecast_q025s[idx+1] for i in forecast_q025s.keys()]).flatten(),
                                        "hi_95": np.array([forecast_q975s[idx+1] for i in forecast_q975s.keys()]).flatten(),
                                        "actual": np.array([df.y for df in test_df_list]).flatten(),
                                        "int_95_score": np.array(int_95_scores).flatten(),
                                        "int_80_score": np.array(int_80_scores).flatten()
                                       })

full_model_test_pred_df.head()

Unnamed: 0,ts_index,forec_mean,forec_median,lo_80,hi_80,lo_95,hi_95,actual,int_95_score,int_80_score
0,1,318.761536,316.214706,4.335004,13.476797,-2.136532,15.045295,320.0,129.314209,67.122037
1,1,332.354095,332.695923,6.416378,13.747629,2.06799,15.560768,339.0,106.755819,72.491187
2,1,330.256012,329.404175,8.158834,19.413512,5.12361,22.984588,349.0,116.935316,72.201669
3,1,346.779816,348.172455,15.526743,28.66325,7.476986,33.031238,343.0,136.529342,94.788641
4,1,358.592896,357.634781,23.854543,44.000627,19.505202,45.411623,343.0,137.9247,69.672821


In [42]:
# Save the data frame to a csv file in case it is needed later
full_model_test_pred_df.to_csv("Results/Global/DeepAR/Full/test_set_pred.csv", index=False)

# Models for Random Clusters

In [15]:
# Code for progress bar:
# https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution
# This allows us to print a progress bar while running parallel loops using joblib 

@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()

In [44]:
# Read in cluster assignments for random clusters
rand_clust = pd.read_csv("Results/Clustering/Random/random_clustering_assign.csv")

In [45]:
# Create a field called cluster with the cluster assignments (for simplicity later on)
rand_clust['cluster'] = rand_clust['random_clust_assign']

In [16]:
# Function to train a DeepAR model on a list of data frames
def train_deepar_clust(clust_df_list):
    """Function takes in: list of data frames which are in the cluster, and returns the trained model."""
    
    # Set the prediction length for one-step ahead and the context length equal to the seasonality
    prediction_length = 1
    context_length = 672
    
    # Create the DeepAR Model
    estimator = deepar.DeepAREstimator(freq="15min",
                                       context_length=context_length,
                                       prediction_length=prediction_length
                                      )
    
    # Create a list of training dictionaries and then the GluonTS training data set
    train_dataset_dict_list = [{"start": df.iloc[0,0], "target": df.y.values} for df in clust_df_list]
    train_data = ListDataset(train_dataset_dict_list, freq="15min")
    
    np.random.seed(54321)
    mx.random.seed(54321)
    # Fit the model to the training data
    mod = estimator.train(training_data=train_data)
    
    # Return the fitted model
    return mod

In [47]:
# Check the number of clusters we need to use
max(rand_clust.cluster.unique())

4

In [48]:
# For each cluster, set up a list for training and a dictionary for testing
rand_clust_1_train_ls = list()
rand_clust_2_train_ls = list()
rand_clust_3_train_ls = list()
rand_clust_4_train_ls = list()

rand_clust_1_test_dict = dict()
rand_clust_2_test_dict = dict()
rand_clust_3_test_dict = dict()
rand_clust_4_test_dict = dict()

# Loop through the list of time series and append the training/testing data for each time series to the 
# appropriate list/dict for each cluster
for row_no in range(rand_clust.shape[0]):
    if rand_clust.iloc[row_no, 2] == 1:
        rand_clust_1_train_ls.append(train_df_list[row_no])
        rand_clust_1_test_dict[rand_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if rand_clust.iloc[row_no, 2] == 2:
        rand_clust_2_train_ls.append(train_df_list[row_no])
        rand_clust_2_test_dict[rand_clust.iloc[row_no, 0]] = test_df_list[row_no]
    
    if rand_clust.iloc[row_no, 2] == 3:
        rand_clust_3_train_ls.append(train_df_list[row_no])
        rand_clust_3_test_dict[rand_clust.iloc[row_no, 0]] = test_df_list[row_no]
    
    if rand_clust.iloc[row_no, 2] == 4:
        rand_clust_4_train_ls.append(train_df_list[row_no])
        rand_clust_4_test_dict[rand_clust.iloc[row_no, 0]] = test_df_list[row_no]

In [49]:
# Create a dictionary for training, where each key is the cluster number and each value is the training data list
# created above
rand_clust_train_dict = {1: rand_clust_1_train_ls,
                         2: rand_clust_2_train_ls,
                         3: rand_clust_3_train_ls,
                         4: rand_clust_4_train_ls
                        }

In [50]:
# Loop through all the clusters and call the function to fit the DeepAR models
# We use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="Random Cluster DeepAR Train", 
                      total=len(rand_clust.cluster.unique()))) as progress_bar:
    rand_clust_deep_ar_mods = Parallel(n_jobs=4)(delayed(train_deepar_clust)(rand_clust_train_dict[i]
                                                                            ) for i in range(1, len(rand_clust.cluster.unique())+1))

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, o

  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
100%|██████████| 50/50 [01:05<00:00,  1.32s/it, epoch=1/100, avg_epoch_loss=5.5] 
100%|██████████| 50/50 [01:06<00:00,  1.33s/it, epoch=1/100, avg_epoch_loss=5.14]
100%|██████████| 50/50 [01:06<00:00,  1.32s/it, epoch=1/100, avg_epoch_loss=5.85]
100%|██████████| 50/50 [01:20<00:00,  1.62s/it, epoch=1/100, avg_epoch_loss=5.52]
100%|██████████| 50/50 [01:00<00:00,  1.20s/it, epoch=2/100, avg_epoch_loss=4.66]
100%|██████████| 50/50 [01:01<00:00,  1.23s/it, epoch=2/100, avg_epoch_los

100%|██████████| 50/50 [01:03<00:00,  1.27s/it, epoch=44/100, avg_epoch_loss=4.62]
100%|██████████| 50/50 [01:04<00:00,  1.29s/it, epoch=44/100, avg_epoch_loss=4.32]
100%|██████████| 50/50 [00:57<00:00,  1.14s/it, epoch=45/100, avg_epoch_loss=4.05]
100%|██████████| 50/50 [01:08<00:00,  1.37s/it, epoch=38/100, avg_epoch_loss=4.41]
100%|██████████| 50/50 [01:03<00:00,  1.28s/it, epoch=45/100, avg_epoch_loss=4.64]
100%|██████████| 50/50 [01:02<00:00,  1.24s/it, epoch=45/100, avg_epoch_loss=4.28]
100%|██████████| 50/50 [01:01<00:00,  1.23s/it, epoch=46/100, avg_epoch_loss=4.07]
100%|██████████| 50/50 [01:11<00:00,  1.43s/it, epoch=39/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [00:59<00:00,  1.20s/it, epoch=46/100, avg_epoch_loss=4.65]
100%|██████████| 50/50 [01:02<00:00,  1.24s/it, epoch=46/100, avg_epoch_loss=4.31]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=47/100, avg_epoch_loss=4.07]
100%|██████████| 50/50 [01:03<00:00,  1.28s/it, epoch=47/100, avg_epoch_loss=4.64]
100%

100%|██████████| 50/50 [00:59<00:00,  1.18s/it, epoch=69/100, avg_epoch_loss=4.31]
100%|██████████| 50/50 [01:12<00:00,  1.44s/it, epoch=59/100, avg_epoch_loss=4.39]
100%|██████████| 50/50 [00:59<00:00,  1.19s/it, epoch=71/100, avg_epoch_loss=4.06]
100%|██████████| 50/50 [00:58<00:00,  1.17s/it, epoch=71/100, avg_epoch_loss=4.62]
100%|██████████| 50/50 [01:01<00:00,  1.23s/it, epoch=70/100, avg_epoch_loss=4.32]
100%|██████████| 50/50 [01:09<00:00,  1.39s/it, epoch=60/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=72/100, avg_epoch_loss=4.65]
100%|██████████| 50/50 [01:03<00:00,  1.26s/it, epoch=72/100, avg_epoch_loss=4.05]
100%|██████████| 50/50 [01:07<00:00,  1.34s/it, epoch=71/100, avg_epoch_loss=4.29]
100%|██████████| 50/50 [01:12<00:00,  1.45s/it, epoch=61/100, avg_epoch_loss=4.38]
100%|██████████| 50/50 [00:56<00:00,  1.14s/it, epoch=73/100, avg_epoch_loss=4.06]
100%|██████████| 50/50 [01:00<00:00,  1.22s/it, epoch=73/100, avg_epoch_loss=4.64]
100%

Early stopping based on learning rate scheduler callback (min_lr was reached).


100%|██████████| 50/50 [00:43<00:00,  1.16it/s, epoch=86/100, avg_epoch_loss=4.36]
100%|██████████| 50/50 [00:42<00:00,  1.17it/s, epoch=87/100, avg_epoch_loss=4.4] 
100%|██████████| 50/50 [00:42<00:00,  1.16it/s, epoch=88/100, avg_epoch_loss=4.38]
100%|██████████| 50/50 [00:42<00:00,  1.16it/s, epoch=89/100, avg_epoch_loss=4.38]
100%|██████████| 50/50 [00:44<00:00,  1.12it/s, epoch=90/100, avg_epoch_loss=4.38]
 24%|██▍       | 12/50 [00:10<00:32,  1.17it/s, epoch=91/100, avg_epoch_loss=4.38]
 72%|███████▏  | 36/50 [00:30<00:11,  1.17it/s, epoch=91/100, avg_epoch_loss=4.38]
100%|██████████| 50/50 [00:42<00:00,  1.17it/s, epoch=91/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [00:43<00:00,  1.16it/s, epoch=92/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [00:43<00:00,  1.16it/s, epoch=93/100, avg_epoch_loss=4.4] 
100%|██████████| 50/50 [00:42<00:00,  1.17it/s, epoch=94/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [00:42<00:00,  1.17it/s, epoch=95/100, avg_epoch_loss=4.37]
100%

In [51]:
# Save each model for future use by pickling it
for mod_no in range(len(rand_clust_deep_ar_mods)):
    with open(f'Results/Global/DeepAR/Random Cluster/mod_{mod_no}', 'wb+') as f:
        pickle.dump(rand_clust_deep_ar_mods[mod_no], f)

In [52]:
# Load in the pickled models into a list
rand_clust_deep_ar_mods = list()

for mod_no in range(max(rand_clust.cluster.unique())):
    with open(f'Results/Global/DeepAR/Random Cluster/mod_{mod_no}', 'rb') as f:
        rand_clust_deep_ar_mods.append(pickle.load(f))

In [53]:
# Print the list of models as a sanity check
rand_clust_deep_ar_mods

[<gluonts.mx.model.predictor.RepresentableBlockPredictor at 0x7f1137d35070>,
 <gluonts.mx.model.predictor.RepresentableBlockPredictor at 0x7f113774dac0>,
 <gluonts.mx.model.predictor.RepresentableBlockPredictor at 0x7f1137759f10>,
 <gluonts.mx.model.predictor.RepresentableBlockPredictor at 0x7f1137773d30>]

In [17]:
# Function to compute clustered test preds
def compute_deepar_test_preds_clust(model_list, cluster_df, full_df_list, clust_no):
    """Function which takes inputs: a trained model, and a list of data frames which contain train+test data, 
    and which returns a df of model predictions on the test data"""
        
    # Set up empty dictionaries in which we will save the forecast results
    forecast_means = dict()
    forecast_medians = dict()
    forecast_q10s = dict()
    forecast_q90s = dict()
    forecast_q025s = dict()
    forecast_q975s = dict()

    # Pick the model to use based on the cluster number passed to the function
    mod = model_list[clust_no-1]
    # Pick which time series indexes we will need based on the cluster number
    ts_idx_ls = cluster_df.query("cluster==@clust_no").ts_index.values
    ts_idx_df_ls = [full_df_list[i-1] for i in ts_idx_ls]
    
    # Add a key to each result dictionary, along with an empty list for that key, for each ts_index we are using
    for idx in ts_idx_ls:
        forecast_means[idx] = list()
        forecast_medians[idx] = list()
        forecast_q10s[idx] = list()
        forecast_q90s[idx] = list()
        forecast_q025s[idx] = list()
        forecast_q975s[idx] = list()

    # Since we are forecasting for 1344 timesteps, we loop through 1344 times
    for n in range(1344):
        print(n)
        # We create a GluonTS data set on which we will perform evaluation. This is done by including all of the 
        # training data in the data set as well as up to the nth value from the test data
        eval_dict_ls = [{"start": df.iloc[0,5],
                         "target": df.iloc[:,[5,8]].iloc[0:(96*10*7+n),:].total_volume.values
                        } for df in ts_idx_df_ls]
        eval_data = ListDataset(eval_dict_ls, freq="15min")

        np.random.seed(n)
        mx.random.seed(n)
    
        # Make our predictions, using 100 samples/traces from the predictive distribution
        forecast_it, ts_it = make_evaluation_predictions(dataset=eval_data,  # test dataset
                                                         predictor=mod,  # predictor
                                                         num_samples=100,  # number of sample paths we want for evaluation
                                                        )

        # Turn our forecasts into a list
        forecasts = list(forecast_it)

        # Loop through the forecast list
        for i in range(len(forecasts)):
            # For each time series, compute the mean and quantiles and append them into the relevant dictionaries
            forecast_mean = forecasts[i].samples.mean()
            forecast_means[ts_idx_ls[i]].append(forecast_mean)

            forecast_percentiles = np.percentile(forecasts[i].samples, [2.5, 10, 50, 90, 97.5])
            forecast_medians[ts_idx_ls[i]].append(forecast_percentiles[2])

            forecast_q10s[ts_idx_ls[i]].append(forecast_percentiles[1])
            forecast_q90s[ts_idx_ls[i]].append(forecast_percentiles[3])
            forecast_q025s[ts_idx_ls[i]].append(forecast_percentiles[0])
            forecast_q975s[ts_idx_ls[i]].append(forecast_percentiles[4])       
           
    # Return the dictionaries of forecast results
    return (forecast_means, forecast_medians, forecast_q10s, forecast_q90s, forecast_q025s, forecast_q975s)

In [55]:
# Loop through all the clusters and call the function to compute test preds. Again, we do this in a parallel 
# fashion through the use of joblib and tdqm
with tqdm_joblib(tqdm(desc="Random Cluster DeepAR Test Preds", 
                      total=len(rand_clust.cluster.unique()))) as progress_bar:
    rand_clust_deep_ar_test_preds = Parallel(n_jobs=4)(delayed(compute_deepar_test_preds_clust)(rand_clust_deep_ar_mods,
                                                                                                rand_clust,
                                                                                                subset_df_list,
                                                                                                i
                                                                                               ) for i in range(1, len(rand_clust.cluster.unique())+1))

Random Cluster DeepAR Test Preds: 100%|██████████| 4/4 [55:24<00:00, 831.05s/it]


In [56]:
# Initialize an empty data frame in which we will store all model forecasts
rand_clust_test_pred_df = pd.DataFrame()

# Loop through the list of model forecasts. It is a list of tuples which contain dictionaries
for i in range(len(rand_clust_deep_ar_test_preds)):
    # Grab a dictionary based on where we are in the loop, in order to get the list of time series indexes
    # present in each entry in the list
    sub_dict = rand_clust_deep_ar_test_preds[i][0]
    # Grab the keys/time series indexes
    key_list = list(sub_dict.keys())
    # For each time series index
    for k in key_list:
        # Create a sub data frame of the prediction results for that index
        sub_df = pd.DataFrame({"ts_index": k,
                               "forec_mean": rand_clust_deep_ar_test_preds[i][0][k],
                               "forec_median": rand_clust_deep_ar_test_preds[i][1][k],
                               "lo_80": rand_clust_deep_ar_test_preds[i][2][k],
                               "hi_80": rand_clust_deep_ar_test_preds[i][3][k],
                               "lo_95": rand_clust_deep_ar_test_preds[i][4][k],
                               "hi_95": rand_clust_deep_ar_test_preds[i][5][k]
                              })
        # Append that sub data frame to the full data frame
        rand_clust_test_pred_df = rand_clust_test_pred_df.append(sub_df)
# Once all results are appended, reset the data frame index to make it easier to work with
rand_clust_test_pred_df = rand_clust_test_pred_df.reset_index(drop=True)

In [57]:
# Sanity check the shape of the data frame
rand_clust_test_pred_df.shape

(102144, 7)

In [58]:
# Set up an empty data frame in which we will save the true values which we are trying to forecast
test_set_df = pd.DataFrame()

# Create a list of the test set dictionaries created earlier
rand_clust_test_list = [rand_clust_1_test_dict, 
                        rand_clust_2_test_dict, 
                        rand_clust_3_test_dict, 
                        rand_clust_4_test_dict]

# For each key in the list of dictionaries, append the test data frame for that index to the full test_set_df
for i in range(len(rand_clust_test_list)):
    key_list = list(rand_clust_test_list[i].keys())
    for k in key_list:
        test_set_df = test_set_df.append(rand_clust_test_list[i][k])

# Again, reset the index
test_set_df = test_set_df.reset_index(drop=True)

In [59]:
# Again, sanity check the shape
test_set_df.shape

(102144, 2)

In [60]:
# Add a column to the forecast data frame which includes the true values of the time series
rand_clust_test_pred_df['actual'] = test_set_df.y

In [61]:
# Compute the PI Interval Scores as new data frame columns
rand_clust_test_pred_df['int_80_score'] = interval_score(rand_clust_test_pred_df['actual'],
                                                         rand_clust_test_pred_df['lo_80'],
                                                         rand_clust_test_pred_df['hi_80'],
                                                         0.8)
                                                         
rand_clust_test_pred_df['int_95_score'] = interval_score(rand_clust_test_pred_df['actual'],
                                                         rand_clust_test_pred_df['lo_95'],
                                                         rand_clust_test_pred_df['hi_95'],
                                                         0.8)

In [62]:
# Set up lists in which we will store RMSE and MAE data, as well as the true time series means
rand_clust_mean_rmse_ls = list()
rand_clust_mean_mae_ls = list()
rand_clust_median_rmse_ls = list()
rand_clust_median_mae_ls = list()
rand_clust_true_means_ls = list()

# Loop through the foreacst data frame, one time series index at a time
for ts_idx in rand_clust_test_pred_df.ts_index.unique():
    # Subset the df to that index
    rand_clust_test_pred_df_sub = rand_clust_test_pred_df.query("ts_index==@ts_idx").copy()
    
    # Compute the rmse using the forecast mean and append to list
    mean_rmse_sub = mean_squared_error(rand_clust_test_pred_df_sub['forec_mean'],
                                  rand_clust_test_pred_df_sub['actual'],
                                  squared=False
                                 )
    rand_clust_mean_rmse_ls.append(mean_rmse_sub)
    
    # Compute the mae using the forecast mean and append to list
    mean_mae_sub = mean_absolute_error(rand_clust_test_pred_df_sub['forec_mean'],
                                  rand_clust_test_pred_df_sub['actual'])
    rand_clust_mean_mae_ls.append(mean_mae_sub)
    
    # Compute the rmse using the forecast median and append to list
    median_rmse_sub = mean_squared_error(rand_clust_test_pred_df_sub['forec_median'],
                                  rand_clust_test_pred_df_sub['actual'],
                                  squared=False
                                 )
    rand_clust_median_rmse_ls.append(median_rmse_sub)
    
    # Compute the mae using the forecast median and append to list
    median_mae_sub = mean_absolute_error(rand_clust_test_pred_df_sub['forec_median'],
                                  rand_clust_test_pred_df_sub['actual'])
    rand_clust_median_mae_ls.append(median_mae_sub)
    
    # Compute the actual value mean and append to list
    actual_mean_sub = np.mean(rand_clust_test_pred_df_sub['actual'])
    rand_clust_true_means_ls.append(actual_mean_sub)          

In [63]:
# Print the averages of the raw and scaled performance metrics
np.mean(rand_clust_mean_rmse_ls)

39.569440982479044

In [64]:
np.mean(rand_clust_mean_mae_ls)

26.73244139321667

In [65]:
np.mean(np.array(rand_clust_mean_rmse_ls)/np.array(rand_clust_true_means_ls))

0.17174908124894114

In [66]:
np.mean(np.array(rand_clust_mean_mae_ls)/np.array(rand_clust_true_means_ls))

0.11597308842893994

In [67]:
np.mean(rand_clust_median_rmse_ls)

39.58611917415589

In [68]:
np.mean(rand_clust_median_mae_ls)

26.741891698390923

In [69]:
np.mean(np.array(rand_clust_median_rmse_ls)/np.array(rand_clust_true_means_ls))

0.17187385442620684

In [70]:
np.mean(np.array(rand_clust_median_mae_ls)/np.array(rand_clust_true_means_ls))

0.11603834738552952

In [71]:
# Print the average of the PI scores
rand_clust_test_pred_df['int_80_score'].mean()

59.90662762378778

In [72]:
rand_clust_test_pred_df['int_95_score'].mean()

98.65108377710315

In [73]:
# Group the test pred df by index, and compute scaled PI scores
rand_clust_test_pred_df_grouped = rand_clust_test_pred_df.groupby("ts_index")\
.agg({"int_80_score":"mean", "int_95_score":"mean", "actual":"mean"}).reset_index()

rand_clust_test_pred_df_grouped['int_95_score_scaled'] = rand_clust_test_pred_df_grouped['int_95_score']/rand_clust_test_pred_df_grouped['actual']
rand_clust_test_pred_df_grouped['int_80_score_scaled'] = rand_clust_test_pred_df_grouped['int_80_score']/rand_clust_test_pred_df_grouped['actual']

In [74]:
# Print the scaled PI scores
rand_clust_test_pred_df_grouped[['int_80_score_scaled', 'int_95_score_scaled']].mean()

int_80_score_scaled    0.278543
int_95_score_scaled    0.458494
dtype: float64

In [75]:
# Save results to a CSV file
rand_clust_test_pred_df.to_csv("Results/Global/DeepAR/Random Cluster/test_set_pred.csv", index=False)

# Highway System Models

In [76]:
# Create a data frame of highway system clusters based on the number of files we have for each highway system
highway_system_clust = pd.DataFrame({"ts_index": np.arange(1, 77),
                                    "cluster": [1]*38 + [2]*19 + [3]*19}
                                   )

In [77]:
max(highway_system_clust.cluster.unique())

3

In [78]:
# Create lists and dictionaries in which we will store the training and testing data for each cluster
highway_system_clust_1_train_ls = list()
highway_system_clust_2_train_ls = list()
highway_system_clust_3_train_ls = list()

highway_system_clust_1_test_dict = dict()
highway_system_clust_2_test_dict = dict()
highway_system_clust_3_test_dict = dict()

# Loop through each ts_index and assign the data to the proper list/dict for its cluster
for row_no in range(highway_system_clust.shape[0]):
    if highway_system_clust.iloc[row_no, 1] == 1:
        highway_system_clust_1_train_ls.append(train_df_list[row_no])
        highway_system_clust_1_test_dict[highway_system_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if highway_system_clust.iloc[row_no, 1] == 2:
        highway_system_clust_2_train_ls.append(train_df_list[row_no])
        highway_system_clust_2_test_dict[highway_system_clust.iloc[row_no, 0]] = test_df_list[row_no]
    
    if highway_system_clust.iloc[row_no, 1] == 3:
        highway_system_clust_3_train_ls.append(train_df_list[row_no])
        highway_system_clust_3_test_dict[highway_system_clust.iloc[row_no, 0]] = test_df_list[row_no]

In [79]:
# Save the lists of training data into a dictionary, with a key for each cluster
highway_clust_train_dict = {1: highway_system_clust_1_train_ls,
                            2: highway_system_clust_2_train_ls,
                            3: highway_system_clust_3_train_ls
                           }

In [80]:
train_start = time.time()
# Loop through all the clusters and call the function to fit models
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="Highway System DeepAR Train", 
                      total=len(highway_system_clust.cluster.unique()))) as progress_bar:
    highway_clust_deep_ar_mods = Parallel(n_jobs=4)(delayed(train_deepar_clust)(highway_clust_train_dict[i]
                                                                               ) for i in range(1, len(highway_system_clust.cluster.unique())+1))
train_end = time.time()

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base = start.freq.base
  0%|          | 0/50 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base = start.freq.base
  0%|          | 0/50 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base = start.freq.base
  0%|          | 0/50 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
100%|██████████| 50/50 [00:54<00:00,  1.10s/it, epoch=1/100, avg_epoch_loss=5.07]
100%|██████████| 50/50 [00:55<00:00,  1.12s/it, epoch=1/100, avg_epoch_loss=5.19]
100%|██████████| 50/50 [00:55<00:00,  1.12s/it, epoch=1/100, avg_epoch_loss=5.85]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=2/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [00:48<00:00,  1.02it/s, epoch=2/100, avg_epoch_loss=4.47]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=2/100, avg_epoch_los

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27


  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=6/100, avg_epoch_loss=4.69]
100%|██████████| 50/50 [00:49<00:00,  1.00it/s, epoch=6/100, avg_epoch_loss=4.14]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=6/100, avg_epoch_loss=4.23]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=7/100, avg_epoch_loss=4.68]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=7/100, avg_epoch_loss=4.14]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=7/100, avg_epoch_loss=4.23]
100%|██████████| 50/50 [00:48<00:00,  1.02it/s, epoch=8/100, avg_epoch_loss=4.7]]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=8/100, avg_epoch_loss=4.15]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=8/100, avg_epoch_loss=4.19]
100%|██████████| 50/50 [00:48<00:00,  1.04it/s, epoch=9/100, avg_epoch_loss=4.13]
100%|███████

100%|██████████| 50/50 [00:49<00:00,  1.02it/s, epoch=68/100, avg_epoch_loss=4.1] 
100%|██████████| 50/50 [00:50<00:00,  1.02s/it, epoch=68/100, avg_epoch_loss=4.1] 
100%|██████████| 50/50 [00:49<00:00,  1.00it/s, epoch=68/100, avg_epoch_loss=4.6]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=69/100, avg_epoch_loss=4.1] 
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=69/100, avg_epoch_loss=4.08]
100%|██████████| 50/50 [00:51<00:00,  1.03s/it, epoch=69/100, avg_epoch_loss=4.6] 
100%|██████████| 50/50 [00:48<00:00,  1.02it/s, epoch=70/100, avg_epoch_loss=4.1] 
100%|██████████| 50/50 [00:48<00:00,  1.02it/s, epoch=70/100, avg_epoch_loss=4.09]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=70/100, avg_epoch_loss=4.59]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it, epoch=71/100, avg_epoch_loss=4.11]
100%|██████████| 50/50 [00:49<00:00,  1.00it/s, epoch=71/100, avg_epoch_loss=4.1]
100%|██████████| 50/50 [00:51<00:00,  1.02s/it, epoch=71/100, avg_epoch_loss=4.58]
100%|█

Early stopping based on learning rate scheduler callback (min_lr was reached).


100%|██████████| 50/50 [00:48<00:00,  1.04it/s, epoch=96/100, avg_epoch_loss=4.6] 
100%|██████████| 50/50 [00:39<00:00,  1.27it/s, epoch=97/100, avg_epoch_loss=4.06]
100%|██████████| 50/50 [00:39<00:00,  1.26it/s, epoch=97/100, avg_epoch_loss=4.6]]
100%|██████████| 50/50 [00:39<00:00,  1.26it/s, epoch=98/100, avg_epoch_loss=4.11]
100%|██████████| 50/50 [00:39<00:00,  1.25it/s, epoch=98/100, avg_epoch_loss=4.6]]
100%|██████████| 50/50 [00:40<00:00,  1.25it/s, epoch=99/100, avg_epoch_loss=4.11]
100%|██████████| 50/50 [00:40<00:00,  1.24it/s, epoch=99/100, avg_epoch_loss=4.59]
Highway System DeepAR Train:  67%|████████    | 2/3 [1:21:39<34:05, 2045.06s/it]11]
Highway System DeepAR Train: 100%|████████████| 3/3 [1:21:55<00:00, 1638.36s/it]58]


In [81]:
(train_end - train_start)/60

81.91793085336685

In [82]:
# Pickle each model to save for future use
for mod_no in range(len(highway_clust_deep_ar_mods)):
    with open(f'Results/Global/DeepAR/Highway System/mod_{mod_no}', 'wb+') as f:
        pickle.dump(highway_clust_deep_ar_mods[mod_no], f)




In [83]:
test_pred_start = time.time()
# Loop through all the clusters and call the function above to compute test preds
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="Highway System DeepAR Test Preds", 
                      total=len(highway_system_clust.cluster.unique()))) as progress_bar:
    highway_clust_deep_ar_test_preds = Parallel(n_jobs=4)(delayed(compute_deepar_test_preds_clust)(highway_clust_deep_ar_mods,
                                                                                                highway_system_clust,
                                                                                                subset_df_list,
                                                                                                i
                                                                                               ) for i in range(1, len(highway_system_clust.cluster.unique())+1))
test_pred_end = time.time()

Highway System DeepAR Test Preds:  67%|██████▋   | 2/3 [37:55<15:44, 944.52s/it]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
Highway System DeepAR Test Preds: 100%|███████| 3/3 [1:01:20<00:00, 1226.83s/it]


In [84]:
(test_pred_end - test_pred_start)/60

61.341307266553244

In [85]:
# Initialize an empty data frame to save test preds in
highway_clust_test_pred_df = pd.DataFrame()

# For each set of clustered results, loop through and store them (by ts_index) in the test pred data frame
for i in range(len(highway_clust_deep_ar_test_preds)):
    sub_dict = highway_clust_deep_ar_test_preds[i][0]
    key_list = list(sub_dict.keys())
    for k in key_list:
        sub_df = pd.DataFrame({"ts_index": k,
                               "forec_mean": highway_clust_deep_ar_test_preds[i][0][k],
                               "forec_median": highway_clust_deep_ar_test_preds[i][1][k],
                               "lo_80": highway_clust_deep_ar_test_preds[i][2][k],
                               "hi_80": highway_clust_deep_ar_test_preds[i][3][k],
                               "lo_95": highway_clust_deep_ar_test_preds[i][4][k],
                               "hi_95": highway_clust_deep_ar_test_preds[i][5][k]
                              })
        highway_clust_test_pred_df = highway_clust_test_pred_df.append(sub_df)
# Reset the index
highway_clust_test_pred_df = highway_clust_test_pred_df.reset_index(drop=True)

In [86]:
# Initialize an empty data frame to save the test set data in
test_set_df = pd.DataFrame()

# Create a list of the test data frames created earlier
highway_clust_test_list = [highway_system_clust_1_test_dict, 
                           highway_system_clust_2_test_dict, 
                           highway_system_clust_3_test_dict]

# append the test data, in the same order as the clustered results, to the test_set_df
for i in range(len(highway_clust_test_list)):
    key_list = list(highway_clust_test_list[i].keys())
    for k in key_list:
        test_set_df = test_set_df.append(highway_clust_test_list[i][k])

test_set_df = test_set_df.reset_index(drop=True)

In [87]:
# Add a column to the test_pred_df with the actual values
highway_clust_test_pred_df['actual'] = test_set_df.y

In [88]:
# Compute the interval scores
highway_clust_test_pred_df['int_80_score'] = interval_score(highway_clust_test_pred_df['actual'],
                                                            highway_clust_test_pred_df['lo_80'],
                                                            highway_clust_test_pred_df['hi_80'],
                                                            0.8)
                                                         
highway_clust_test_pred_df['int_95_score'] = interval_score(highway_clust_test_pred_df['actual'],
                                                            highway_clust_test_pred_df['lo_95'],
                                                            highway_clust_test_pred_df['hi_95'],
                                                            0.8)

In [89]:
# Initialize empty lists to score rmse and mae values in 
highway_clust_mean_rmse_ls = list()
highway_clust_mean_mae_ls = list()
highway_clust_median_rmse_ls = list()
highway_clust_median_mae_ls = list()
highway_clust_true_means_ls = list()

# For each ts_index in the data set
for ts_idx in highway_clust_test_pred_df.ts_index.unique():
    # Subset to that index
    highway_clust_test_pred_df_sub = highway_clust_test_pred_df.query("ts_index==@ts_idx").copy()
    
    # Compute the RMSE with the mean of the forecast distribtuion and append to list
    mean_rmse_sub = mean_squared_error(highway_clust_test_pred_df_sub['forec_mean'],
                                  highway_clust_test_pred_df_sub['actual'],
                                  squared=False
                                 )
    highway_clust_mean_rmse_ls.append(mean_rmse_sub)
    
    # Compute the MAE with the mean of the forecast distribtuion and append to list
    mean_mae_sub = mean_absolute_error(highway_clust_test_pred_df_sub['forec_mean'],
                                  highway_clust_test_pred_df_sub['actual'])
    highway_clust_mean_mae_ls.append(mean_mae_sub)
    
    # Compute the RMSE with the median of the forecast distribtuion and append to list
    median_rmse_sub = mean_squared_error(highway_clust_test_pred_df_sub['forec_median'],
                                  highway_clust_test_pred_df_sub['actual'],
                                  squared=False
                                 )
    highway_clust_median_rmse_ls.append(median_rmse_sub)
    
    # Compute the MAE with the median of the forecast distribtuion and append to list
    median_mae_sub = mean_absolute_error(highway_clust_test_pred_df_sub['forec_median'],
                                  highway_clust_test_pred_df_sub['actual'])
    highway_clust_median_mae_ls.append(median_mae_sub)
    
    # Compute the actual mean and append to list
    actual_mean_sub = np.mean(highway_clust_test_pred_df_sub['actual'])
    highway_clust_true_means_ls.append(actual_mean_sub)          

In [90]:
# Print the point forecast scores
np.mean(highway_clust_mean_rmse_ls)

39.393279475482586

In [91]:
np.mean(highway_clust_mean_mae_ls)

26.557034395047403

In [92]:
np.mean(np.array(highway_clust_mean_rmse_ls)/np.array(highway_clust_true_means_ls))

0.17136266789372637

In [93]:
np.mean(np.array(highway_clust_mean_mae_ls)/np.array(highway_clust_true_means_ls))

0.11561280054627007

In [94]:
np.mean(highway_clust_median_rmse_ls)

39.415941624078755

In [95]:
np.mean(highway_clust_median_mae_ls)

26.56780014027143

In [96]:
np.mean(np.array(highway_clust_median_rmse_ls)/np.array(highway_clust_true_means_ls))

0.17147476626581862

In [97]:
np.mean(np.array(highway_clust_median_mae_ls)/np.array(highway_clust_true_means_ls))

0.11568445416155902

In [98]:
# Print the interval scores
highway_clust_test_pred_df['int_80_score'].mean()

59.9448678382014

In [99]:
highway_clust_test_pred_df['int_95_score'].mean()

98.69178320102864

In [100]:
# Group the test preds by ts_index and compute the scaled interval scores
highway_clust_test_pred_df_grouped = highway_clust_test_pred_df.groupby("ts_index")\
.agg({"int_80_score":"mean","int_95_score":"mean","actual":"mean"}).reset_index()

highway_clust_test_pred_df_grouped['int_95_score_scaled'] = highway_clust_test_pred_df_grouped['int_95_score']/highway_clust_test_pred_df_grouped['actual']
highway_clust_test_pred_df_grouped['int_80_score_scaled'] = highway_clust_test_pred_df_grouped['int_80_score']/highway_clust_test_pred_df_grouped['actual']

In [101]:
highway_clust_test_pred_df_grouped[['int_80_score_scaled', 'int_95_score_scaled']].mean()

int_80_score_scaled    0.278462
int_95_score_scaled    0.457297
dtype: float64

In [102]:
# Save results to a csv
highway_clust_test_pred_df.to_csv("Results/Global/DeepAR/Highway System/test_set_pred.csv", index=False)

# Catch22 KMeans Clusters

In [18]:
# Read in cluster assignments from Catch22-based clusters
catch22_clust = pd.read_csv("Results/Clustering/KMeans/kmeans_catch22_clustering_assign.csv")
# Rename the field to "cluster" to match expectations from above functions
catch22_clust['cluster'] = catch22_clust['kmeans_catch22_clust_assign']

In [19]:
# Check the number of clusters we will need to use
max(catch22_clust.cluster.unique())

5

In [20]:
# Create empty lists for training data and dictionaries for test data, one for each cluster
catch22_clust_1_train_ls = list()
catch22_clust_2_train_ls = list()
catch22_clust_3_train_ls = list()
catch22_clust_4_train_ls = list()
catch22_clust_5_train_ls = list()

catch22_clust_1_test_dict = dict()
catch22_clust_2_test_dict = dict()
catch22_clust_3_test_dict = dict()
catch22_clust_4_test_dict = dict()
catch22_clust_5_test_dict = dict()

# Loop through the cluster assignments
for row_no in range(catch22_clust.shape[0]):
    # For each time series, if the time series belongs to a givne cluster
    if catch22_clust.iloc[row_no, 1] == 1:
        # Append the training data to the list for that cluster
        catch22_clust_1_train_ls.append(train_df_list[row_no])
        # Add an entry to the test dictionary of the test data, with the ts_index as the key
        catch22_clust_1_test_dict[catch22_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if catch22_clust.iloc[row_no, 1] == 2:
        catch22_clust_2_train_ls.append(train_df_list[row_no])
        catch22_clust_2_test_dict[catch22_clust.iloc[row_no, 0]] = test_df_list[row_no]
    
    if catch22_clust.iloc[row_no, 1] == 3:
        catch22_clust_3_train_ls.append(train_df_list[row_no])
        catch22_clust_3_test_dict[catch22_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if catch22_clust.iloc[row_no, 1] == 4:
        catch22_clust_4_train_ls.append(train_df_list[row_no])
        catch22_clust_4_test_dict[catch22_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if catch22_clust.iloc[row_no, 1] == 5:
        catch22_clust_5_train_ls.append(train_df_list[row_no])
        catch22_clust_5_test_dict[catch22_clust.iloc[row_no, 0]] = test_df_list[row_no]

In [21]:
# Create a dictionary of the training lists, to be used in the next cell to loop through cluster assignments
# and train a model for each cluster
catch22_clust_train_dict = {1: catch22_clust_1_train_ls,
                            2: catch22_clust_2_train_ls,
                            3: catch22_clust_3_train_ls,
                            4: catch22_clust_4_train_ls,
                            5: catch22_clust_5_train_ls
                           }

In [22]:
train_start = time.time()
# Loop through all the clusters and train the models, saving each model as a list entry
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="Catch22 KMeans DeepAR Train", 
                      total=len(catch22_clust.cluster.unique()))) as progress_bar:
    catch22_clust_deep_ar_mods = Parallel(n_jobs=3)(delayed(train_deepar_clust)(catch22_clust_train_dict[i]
                                                                               ) for i in range(1, len(catch22_clust.cluster.unique())+1))
train_end = time.time()

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, o

100%|██████████| 50/50 [01:05<00:00,  1.30s/it, epoch=1/100, avg_epoch_loss=3.61]
100%|██████████| 50/50 [01:05<00:00,  1.32s/it, epoch=1/100, avg_epoch_loss=5.28]
100%|██████████| 50/50 [01:06<00:00,  1.34s/it, epoch=1/100, avg_epoch_loss=5.13]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=2/100, avg_epoch_loss=3.17]
100%|██████████| 50/50 [00:58<00:00,  1.17s/it, epoch=2/100, avg_epoch_loss=4.53]
100%|██████████| 50/50 [00:59<00:00,  1.20s/it, epoch=2/100, avg_epoch_loss=4.37]
100%|██████████| 50/50 [01:00<00:00,  1.20s/it, epoch=3/100, avg_epoch_loss=3.11]
100%|██████████| 50/50 [00:58<00:00,  1.18s/it, epoch=3/100, avg_epoch_loss=4.38]
100%|██████████| 50/50 [00:58<00:00,  1.17s/it, epoch=3/100, avg_epoch_loss=4.21]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=4/100, avg_epoch_loss=3.09]
100%|██████████| 50/50 [00:57<00:00,  1.16s/it, epoch=4/100, avg_epoch_loss=4.33]
100%|██████████| 50/50 [01:00<00:00,  1.20s/it, epoch=4/100, avg_epoch_loss=4.15]
100%|██████████|

100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=67/100, avg_epoch_loss=2.92]
100%|██████████| 50/50 [00:56<00:00,  1.14s/it, epoch=67/100, avg_epoch_loss=3.88]
100%|██████████| 50/50 [00:58<00:00,  1.17s/it, epoch=67/100, avg_epoch_loss=4.19]
100%|██████████| 50/50 [00:58<00:00,  1.17s/it, epoch=68/100, avg_epoch_loss=2.92]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=68/100, avg_epoch_loss=3.88]
100%|██████████| 50/50 [01:00<00:00,  1.21s/it, epoch=68/100, avg_epoch_loss=4.21]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=69/100, avg_epoch_loss=2.9]]
100%|██████████| 50/50 [01:00<00:00,  1.20s/it, epoch=69/100, avg_epoch_loss=3.88]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=69/100, avg_epoch_loss=4.2]
100%|██████████| 50/50 [00:58<00:00,  1.17s/it, epoch=70/100, avg_epoch_loss=2.9]]
100%|██████████| 50/50 [00:59<00:00,  1.19s/it, epoch=70/100, avg_epoch_loss=3.88]
100%|██████████| 50/50 [00:59<00:00,  1.20s/it, epoch=70/100, avg_epoch_loss=4.21]
100%|

100%|██████████| 50/50 [00:49<00:00,  1.02it/s, epoch=24/100, avg_epoch_loss=4.73]
100%|██████████| 50/50 [00:50<00:00,  1.02s/it, epoch=25/100, avg_epoch_loss=4.69]
100%|██████████| 50/50 [00:49<00:00,  1.00it/s, epoch=25/100, avg_epoch_loss=4.76]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it, epoch=26/100, avg_epoch_loss=4.69]
100%|██████████| 50/50 [00:50<00:00,  1.02s/it, epoch=26/100, avg_epoch_loss=4.76]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=27/100, avg_epoch_loss=4.7] 
100%|██████████| 50/50 [00:49<00:00,  1.00it/s, epoch=27/100, avg_epoch_loss=4.75]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s, epoch=28/100, avg_epoch_loss=4.69]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s, epoch=28/100, avg_epoch_loss=4.75]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it, epoch=29/100, avg_epoch_loss=4.68]
100%|██████████| 50/50 [00:51<00:00,  1.03s/it, epoch=29/100, avg_epoch_loss=4.76]
100%|██████████| 50/50 [00:48<00:00,  1.02it/s, epoch=30/100, avg_epoch_loss=4.7] 
100%

In [23]:
(train_end - train_start)/60

180.04678500493367

In [24]:
# Pickle each model and save to a file for future loading if needed
for mod_no in range(len(catch22_clust_deep_ar_mods)):
    with open(f'Results/Global/DeepAR/Catch22 KMeans/mod_{mod_no}', 'wb+') as f:
        pickle.dump(catch22_clust_deep_ar_mods[mod_no], f)

In [26]:
test_pred_start = time.time()
# Loop through all the clusters and compute test preds, saving to a list
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="Catch22 KMeans System DeepAR Test Preds", 
                      total=len(catch22_clust.cluster.unique()))) as progress_bar:
    catch22_clust_deep_ar_test_preds = Parallel(n_jobs=3)(delayed(compute_deepar_test_preds_clust)(catch22_clust_deep_ar_mods,
                                                                                                   catch22_clust,
                                                                                                   subset_df_list,
                                                                                                   i
                                                                                                  ) for i in range(1, len(catch22_clust.cluster.unique())+1))
test_pred_end = time.time()

Catch22 KMeans System DeepAR Test Preds:  40%|▊ | 2/5 [45:01<55:50, 1116.75s/it]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

Catch22 KMeans System DeepAR Test Preds: 100%|█| 5/5 [1:31:33<00:00, 1098.61s/it


In [27]:
(test_pred_end - test_pred_start)/60

91.55127848784129

In [28]:
# Create an empty data frame in which we will store test set preds 
catch22_clust_test_pred_df = pd.DataFrame()

# Loop through the list of test set preds
for i in range(len(catch22_clust_deep_ar_test_preds)):
    # Grab one dictionary from the i'th entry of the list so we can extract the keys from the dictionary
    # The keys tells us which ts_indexes are present in this entry of the list
    sub_dict = catch22_clust_deep_ar_test_preds[i][0]
    key_list = list(sub_dict.keys())
    # Loop through the keys
    for k in key_list:
        # Create a data frame for the test set preds for each individual key (time series)
        sub_df = pd.DataFrame({"ts_index": k,
                               "forec_mean": catch22_clust_deep_ar_test_preds[i][0][k],
                               "forec_median": catch22_clust_deep_ar_test_preds[i][1][k],
                               "lo_80": catch22_clust_deep_ar_test_preds[i][2][k],
                               "hi_80": catch22_clust_deep_ar_test_preds[i][3][k],
                               "lo_95": catch22_clust_deep_ar_test_preds[i][4][k],
                               "hi_95": catch22_clust_deep_ar_test_preds[i][5][k]
                              })
        # Append that data frame to the df for all test preds
        catch22_clust_test_pred_df = catch22_clust_test_pred_df.append(sub_df)
        
# Reset the df index to ensure data can be joined to this df as expected later
catch22_clust_test_pred_df = catch22_clust_test_pred_df.reset_index(drop=True)

In [29]:
# Create an empty data frame to save the test set values in, in the order of the clusters for easy joining to the
# model forecasts
test_set_df = pd.DataFrame()

# First, create a list of the dictionaries of the test data
catch22_clust_test_list = [catch22_clust_1_test_dict, 
                           catch22_clust_2_test_dict, 
                           catch22_clust_3_test_dict,
                           catch22_clust_4_test_dict,
                           catch22_clust_5_test_dict]

# Then, loop through that list
for i in range(len(catch22_clust_test_list)):
    # Grab the keys for each dict in the list
    key_list = list(catch22_clust_test_list[i].keys())
    # For each key in the dict, grab the values (the test set data for each ts_index) 
    for k in key_list:
        # And append to the test set data frame
        test_set_df = test_set_df.append(catch22_clust_test_list[i][k])

# Reset index to ensure adding this data to the test set forecasts works properly
test_set_df = test_set_df.reset_index(drop=True)

In [30]:
# Add the true values to the test pred data frame as a new column
catch22_clust_test_pred_df['actual'] = test_set_df.y

In [31]:
# Compute the interval score for each forecast
catch22_clust_test_pred_df['int_80_score'] = interval_score(catch22_clust_test_pred_df['actual'],
                                                            catch22_clust_test_pred_df['lo_80'],
                                                            catch22_clust_test_pred_df['hi_80'],
                                                            0.8)
                                                         
catch22_clust_test_pred_df['int_95_score'] = interval_score(catch22_clust_test_pred_df['actual'],
                                                            catch22_clust_test_pred_df['lo_95'],
                                                            catch22_clust_test_pred_df['hi_95'],
                                                            0.8)

In [32]:
# Create empty lists to store RMSE, MAE, and true mean values for test set
catch22_clust_mean_rmse_ls = list()
catch22_clust_mean_mae_ls = list()
catch22_clust_median_rmse_ls = list()
catch22_clust_median_mae_ls = list()
catch22_clust_true_means_ls = list()

for ts_idx in catch22_clust_test_pred_df.ts_index.unique():
    # For each ts_index in our data set (i.e., for each individual time series)
    catch22_clust_test_pred_df_sub = catch22_clust_test_pred_df.query("ts_index==@ts_idx").copy()
    
    # Compute the RMSE of the test set using the mean of the predicted distribtuion as the forecast
    # Append to the appropriate list
    mean_rmse_sub = mean_squared_error(catch22_clust_test_pred_df_sub['forec_mean'],
                                       catch22_clust_test_pred_df_sub['actual'],
                                       squared=False
                                      )
    catch22_clust_mean_rmse_ls.append(mean_rmse_sub)
    
    # Compute the MAE of the test set using the mean of the predicted distribtuion as the forecast
    # Append to the appropriate list
    mean_mae_sub = mean_absolute_error(catch22_clust_test_pred_df_sub['forec_mean'],
                                       catch22_clust_test_pred_df_sub['actual'])
    catch22_clust_mean_mae_ls.append(mean_mae_sub)

    # Compute the RMSE of the test set using the median of the predicted distribtuion as the forecast
    # Append to the appropriate list
    median_rmse_sub = mean_squared_error(catch22_clust_test_pred_df_sub['forec_median'],
                                         catch22_clust_test_pred_df_sub['actual'],
                                         squared=False
                                        )
    catch22_clust_median_rmse_ls.append(median_rmse_sub)
    
    # Compute the MAE of the test set using the median of the predicted distribtuion as the forecast
    # Append to the appropriate list
    median_mae_sub = mean_absolute_error(catch22_clust_test_pred_df_sub['forec_median'],
                                         catch22_clust_test_pred_df_sub['actual'])
    catch22_clust_median_mae_ls.append(median_mae_sub)

    # Compute the true mean of the test set for that ts_index
    # Append to the appropriate list
    actual_mean_sub = np.mean(catch22_clust_test_pred_df_sub['actual'])
    catch22_clust_true_means_ls.append(actual_mean_sub)          

In [33]:
# Print point forecast scores using the mean as the point forecast
np.mean(catch22_clust_mean_rmse_ls)

39.24901162033407

In [34]:
np.mean(catch22_clust_mean_mae_ls)

26.441713604411024

In [35]:
np.mean(np.array(catch22_clust_mean_rmse_ls)/np.array(catch22_clust_true_means_ls))

0.17102363018999897

In [36]:
np.mean(np.array(catch22_clust_mean_mae_ls)/np.array(catch22_clust_true_means_ls))

0.11528499718601593

In [37]:
# Print point forecast scores using the median as the point forecast
np.mean(catch22_clust_median_rmse_ls)

39.25402590836379

In [38]:
np.mean(catch22_clust_median_mae_ls)

26.436013341717864

In [39]:
np.mean(np.array(catch22_clust_median_rmse_ls)/np.array(catch22_clust_true_means_ls))

0.1710698669210077

In [40]:
np.mean(np.array(catch22_clust_median_mae_ls)/np.array(catch22_clust_true_means_ls))

0.11525885869171595

In [41]:
# Print interval scores
catch22_clust_test_pred_df['int_80_score'].mean()

59.36933194467871

In [42]:
catch22_clust_test_pred_df['int_95_score'].mean()

97.74477599568537

In [43]:
# Group by ts_index and compute scaled interval scores
catch22_clust_test_pred_df_grouped = catch22_clust_test_pred_df.groupby("ts_index")\
.agg({"int_80_score":"mean","int_95_score":"mean","actual":"mean"}).reset_index()

catch22_clust_test_pred_df_grouped['int_95_score_scaled'] = catch22_clust_test_pred_df_grouped['int_95_score']/catch22_clust_test_pred_df_grouped['actual']
catch22_clust_test_pred_df_grouped['int_80_score_scaled'] = catch22_clust_test_pred_df_grouped['int_80_score']/catch22_clust_test_pred_df_grouped['actual']

In [44]:
# Print scaled interval scores
catch22_clust_test_pred_df_grouped[['int_80_score_scaled', 'int_95_score_scaled']].mean()

int_80_score_scaled    0.273589
int_95_score_scaled    0.446454
dtype: float64

In [45]:
# Save the test pred results to a csv
catch22_clust_test_pred_df.to_csv("Results/Global/DeepAR/Catch22 KMeans/test_set_pred.csv", index=False)

# TSFeat KMeans

In [46]:
# Read in the cluster assignments for the KMeans clusted based on tsfeat feature set
tsfeat_clust = pd.read_csv("Results/Clustering/KMeans/kmeans_tsfeat_clustering_assign.csv")
tsfeat_clust['cluster'] =  tsfeat_clust['kmeans_tsfeat_clust_assign']

In [47]:
# Check the number of clusters for this technique
max(tsfeat_clust.cluster.unique())

2

In [48]:
# Create empty lists and dicts in which to save training and test data for each cluster
tsfeat_clust_1_train_ls = list()
tsfeat_clust_2_train_ls = list()

tsfeat_clust_1_test_dict = dict()
tsfeat_clust_2_test_dict = dict()

# Loop through the cluster assignments
for row_no in range(tsfeat_clust.shape[0]):
    # Depending on the cluster assignment, assign training and testing data to the appropriate list or dict
    if tsfeat_clust.iloc[row_no, 1] == 1:
        tsfeat_clust_1_train_ls.append(train_df_list[row_no])
        tsfeat_clust_1_test_dict[tsfeat_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if tsfeat_clust.iloc[row_no, 1] == 2:
        tsfeat_clust_2_train_ls.append(train_df_list[row_no])
        tsfeat_clust_2_test_dict[tsfeat_clust.iloc[row_no, 0]] = test_df_list[row_no]


In [49]:
# Create a dictionary of the lists of training data
tsfeat_clust_train_dict = {1: tsfeat_clust_1_train_ls,
                           2: tsfeat_clust_2_train_ls
                          }

In [50]:
train_start = time.time()
# Loop through all the clusters and train the models, saving each model as an entry in a list
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="TSFeat KMeans DeepAR Train", 
                      total=len(tsfeat_clust.cluster.unique()))) as progress_bar:
    tsfeat_clust_deep_ar_mods = Parallel(n_jobs=2)(delayed(train_deepar_clust)(tsfeat_clust_train_dict[i]
                                                                              ) for i in range(1, len(tsfeat_clust.cluster.unique())+1))
train_end = time.time()

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(


673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922


  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is Non

100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=18/100, avg_epoch_loss=4.01]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=18/100, avg_epoch_loss=4.58]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=19/100, avg_epoch_loss=3.99]
100%|██████████| 50/50 [00:58<00:00,  1.16s/it, epoch=19/100, avg_epoch_loss=4.58]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=20/100, avg_epoch_loss=3.99]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=20/100, avg_epoch_loss=4.56]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=21/100, avg_epoch_loss=3.98]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=21/100, avg_epoch_loss=4.56]
100%|██████████| 50/50 [00:56<00:00,  1.12s/it, epoch=22/100, avg_epoch_loss=3.96]
100%|██████████| 50/50 [00:56<00:00,  1.13s/it, epoch=22/100, avg_epoch_loss=4.55]
100%|██████████| 50/50 [00:53<00:00,  1.07s/it, epoch=23/100, avg_epoch_loss=3.96]
100%|██████████| 50/50 [00:53<00:00,  1.07s/it, epoch=23/100, avg_epoch_loss=4.53]
100%

Early stopping based on learning rate scheduler callback (min_lr was reached).


100%|██████████| 50/50 [00:37<00:00,  1.33it/s, epoch=97/100, avg_epoch_loss=4.55]
100%|██████████| 50/50 [00:37<00:00,  1.35it/s, epoch=98/100, avg_epoch_loss=4.52]
100%|██████████| 50/50 [00:37<00:00,  1.33it/s, epoch=99/100, avg_epoch_loss=4.52]
TSFeat KMeans DeepAR Train: 100%|█████████████| 2/2 [1:31:39<00:00, 2749.83s/it]53]


In [51]:
(train_end - train_start)/60

91.66115262905757

In [52]:
# Save the models via pickle for future use
for mod_no in range(len(tsfeat_clust_deep_ar_mods)):
    with open(f'Results/Global/DeepAR/TSFeat KMeans/mod_{mod_no}', 'wb+') as f:
        pickle.dump(tsfeat_clust_deep_ar_mods[mod_no], f)

In [53]:
test_pred_start = time.time()
# Loop through all the clusters and compute test preds - save the results for each cluster as an entry in a list
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="TSFeat KMeans System DeepAR Test Preds", 
                      total=len(tsfeat_clust.cluster.unique()))) as progress_bar:
    tsfeat_clust_deep_ar_test_preds = Parallel(n_jobs=2)(delayed(compute_deepar_test_preds_clust)(tsfeat_clust_deep_ar_mods,
                                                                                                  tsfeat_clust,
                                                                                                  subset_df_list,
                                                                                                  i
                                                                                                 ) for i in range(1, len(tsfeat_clust.cluster.unique())+1))
test_pred_end = time.time()

TSFeat KMeans System DeepAR Test Preds:  50%|█▌ | 1/2 [55:38<55:38, 3338.09s/it]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
TSFeat KMeans System DeepAR Test Preds: 100%|█| 2/2 [1:21:52<00:00, 2456.26s/it]


In [54]:
(test_pred_end - test_pred_start)/60

81.87540959119796

In [55]:
# Create an empty data frame to store test set preds in
tsfeat_clust_test_pred_df = pd.DataFrame()
# Loop through the list of test set preds
for i in range(len(tsfeat_clust_deep_ar_test_preds)):
    # Grab a dict from each entry in the list
    sub_dict = tsfeat_clust_deep_ar_test_preds[i][0]
    # Grab the set of keys for that dict - the keys are the ts_indexes
    key_list = list(sub_dict.keys())
    # Loop through the keys
    for k in key_list:
        # Create a dictionary of test set preds (mean, median, percentiles) for that ts_index
        sub_df = pd.DataFrame({"ts_index": k,
                               "forec_mean": tsfeat_clust_deep_ar_test_preds[i][0][k],
                               "forec_median": tsfeat_clust_deep_ar_test_preds[i][1][k],
                               "lo_80": tsfeat_clust_deep_ar_test_preds[i][2][k],
                               "hi_80": tsfeat_clust_deep_ar_test_preds[i][3][k],
                               "lo_95": tsfeat_clust_deep_ar_test_preds[i][4][k],
                               "hi_95": tsfeat_clust_deep_ar_test_preds[i][5][k]
                              })
        # Append that data frame to the data frame for all test set preds
        tsfeat_clust_test_pred_df = tsfeat_clust_test_pred_df.append(sub_df)
# Reset the index so we can add the true value column later
tsfeat_clust_test_pred_df = tsfeat_clust_test_pred_df.reset_index(drop=True)

In [56]:
# Create an empty data frame to save the true test data in, in the order of the clusters
test_set_df = pd.DataFrame()

# Create a list of the dictionaries of test data
tsfeat_clust_test_list = [tsfeat_clust_1_test_dict, 
                          tsfeat_clust_2_test_dict]

# Loop through the list
for i in range(len(tsfeat_clust_test_list)):
    key_list = list(tsfeat_clust_test_list[i].keys())
    # One by one, for each key in each dictionary, append the data for that key (ts_index) tp the test_set_df
    for k in key_list:
        test_set_df = test_set_df.append(tsfeat_clust_test_list[i][k])

# Reset the index so that adding actual values in a new column works as expected
test_set_df = test_set_df.reset_index(drop=True)

In [57]:
# Add the true values to the test pred data frame in a new column called 'actual'
tsfeat_clust_test_pred_df['actual'] = test_set_df.y

In [58]:
# Compute the interval score for each forecast
tsfeat_clust_test_pred_df['int_80_score'] = interval_score(tsfeat_clust_test_pred_df['actual'],
                                                            tsfeat_clust_test_pred_df['lo_80'],
                                                            tsfeat_clust_test_pred_df['hi_80'],
                                                            0.8)
                                                         
tsfeat_clust_test_pred_df['int_95_score'] = interval_score(tsfeat_clust_test_pred_df['actual'],
                                                            tsfeat_clust_test_pred_df['lo_95'],
                                                            tsfeat_clust_test_pred_df['hi_95'],
                                                            0.8)

In [59]:
# Initialize empty lists in which to save rmse, mae, and true mean
tsfeat_clust_mean_rmse_ls = list()
tsfeat_clust_mean_mae_ls = list()
tsfeat_clust_median_rmse_ls = list()
tsfeat_clust_median_mae_ls = list()
tsfeat_clust_true_means_ls = list()

for ts_idx in tsfeat_clust_test_pred_df.ts_index.unique():
    # Loop through each time series and grab the test preds for that ts_index
    tsfeat_clust_test_pred_df_sub = tsfeat_clust_test_pred_df.query("ts_index==@ts_idx").copy()
    
    # Compute the test RMSE with the mean for the forecast and append to list
    mean_rmse_sub = mean_squared_error(tsfeat_clust_test_pred_df_sub['forec_mean'],
                                       tsfeat_clust_test_pred_df_sub['actual'],
                                       squared=False
                                      )
    tsfeat_clust_mean_rmse_ls.append(mean_rmse_sub)
    
    # Compute the test MAE with the mean for the forecast and append to list
    mean_mae_sub = mean_absolute_error(tsfeat_clust_test_pred_df_sub['forec_mean'],
                                       tsfeat_clust_test_pred_df_sub['actual'])
    tsfeat_clust_mean_mae_ls.append(mean_mae_sub)
    
    # Compute the test RMSE with the median for the forecast and append to list
    median_rmse_sub = mean_squared_error(tsfeat_clust_test_pred_df_sub['forec_median'],
                                         tsfeat_clust_test_pred_df_sub['actual'],
                                         squared=False
                                        )
    tsfeat_clust_median_rmse_ls.append(median_rmse_sub)
    
    # Compute the test MAE with the median for the forecast and append to list
    median_mae_sub = mean_absolute_error(tsfeat_clust_test_pred_df_sub['forec_median'],
                                         tsfeat_clust_test_pred_df_sub['actual'])
    tsfeat_clust_median_mae_ls.append(median_mae_sub)
    
    # Compute true mean and append to list
    actual_mean_sub = np.mean(tsfeat_clust_test_pred_df_sub['actual'])
    tsfeat_clust_true_means_ls.append(actual_mean_sub)          

In [60]:
# Print point forecasts scores based on mean point forecasts
np.mean(tsfeat_clust_mean_rmse_ls)

39.789082300617395

In [61]:
np.mean(tsfeat_clust_mean_mae_ls)

26.835704702720495

In [62]:
np.mean(np.array(tsfeat_clust_mean_rmse_ls)/np.array(tsfeat_clust_true_means_ls))

0.17356367102003153

In [63]:
np.mean(np.array(tsfeat_clust_mean_mae_ls)/np.array(tsfeat_clust_true_means_ls))

0.11687383556375402

In [64]:
# Print point forecasts scores based on median point forecasts
np.mean(tsfeat_clust_median_rmse_ls)

39.80582736771845

In [65]:
np.mean(tsfeat_clust_median_mae_ls)

26.853867375379707

In [66]:
np.mean(np.array(tsfeat_clust_median_rmse_ls)/np.array(tsfeat_clust_true_means_ls))

0.17363961912174794

In [67]:
np.mean(np.array(tsfeat_clust_median_mae_ls)/np.array(tsfeat_clust_true_means_ls))

0.11694896991933777

In [68]:
# Print the interval scores
tsfeat_clust_test_pred_df['int_80_score'].mean()

60.55593458632375

In [69]:
tsfeat_clust_test_pred_df['int_95_score'].mean()

99.73134897960577

In [70]:
# Group by ts_index and compute the scaled interval scores
tsfeat_clust_test_pred_df_grouped = tsfeat_clust_test_pred_df.groupby("ts_index")\
.agg({"int_80_score":"mean", "int_95_score":"mean", "actual":"mean"}).reset_index()

tsfeat_clust_test_pred_df_grouped['int_95_score_scaled'] = tsfeat_clust_test_pred_df_grouped['int_95_score']/tsfeat_clust_test_pred_df_grouped['actual']
tsfeat_clust_test_pred_df_grouped['int_80_score_scaled'] = tsfeat_clust_test_pred_df_grouped['int_80_score']/tsfeat_clust_test_pred_df_grouped['actual']

In [71]:
# Print the scaled interval scores
tsfeat_clust_test_pred_df_grouped[['int_80_score_scaled', 'int_95_score_scaled']].mean()

int_80_score_scaled    0.280645
int_95_score_scaled    0.462363
dtype: float64

In [72]:
# Save the test pred results to a csv
tsfeat_clust_test_pred_df.to_csv("Results/Global/DeepAR/TSFeat KMeans/test_set_pred.csv", index=False)

# DTW Clusters

In [73]:
# Read in the cluster assignments for the DTW based clusters
dtw_clust = pd.read_csv("Results/Clustering/DTW/dtw_clustering_assign.csv")
dtw_clust['cluster'] =  dtw_clust['dtw_clust_assign']

In [74]:
# Check the number of clusters we will be using
max(dtw_clust.cluster.unique())

2

In [75]:
# Create empty lists and dictionaries for training and testing data
dtw_clust_1_train_ls = list()
dtw_clust_2_train_ls = list()

dtw_clust_1_test_dict = dict()
dtw_clust_2_test_dict = dict()

# Loop through our cluster assingments and assign the proper training and testing data to each list or dict
for row_no in range(dtw_clust.shape[0]):
    if dtw_clust.iloc[row_no, 1] == 1:
        dtw_clust_1_train_ls.append(train_df_list[row_no])
        dtw_clust_1_test_dict[dtw_clust.iloc[row_no, 0]] = test_df_list[row_no]
        
    if dtw_clust.iloc[row_no, 1] == 2:
        dtw_clust_2_train_ls.append(train_df_list[row_no])
        dtw_clust_2_test_dict[dtw_clust.iloc[row_no, 0]] = test_df_list[row_no]

In [76]:
# Create a training dictionary with keys for each cluster and values of each list of cluster training data
dtw_clust_train_dict = {1: dtw_clust_1_train_ls,
                        2: dtw_clust_2_train_ls
                       }

In [77]:
train_start = time.time()
# Loop through all the clusters and train a model for each cluster
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="DTW Clusters DeepAR Train", 
                      total=len(dtw_clust.cluster.unique()))) as progress_bar:
    dtw_clust_deep_ar_mods = Parallel(n_jobs=2)(delayed(train_deepar_clust)(dtw_clust_train_dict[i]
                                                                           ) for i in range(1, len(dtw_clust.cluster.unique())+1))
train_end = time.time()

DTW Clusters DeepAR Train:   0%|                          | 0/2 [00:00<?, ?it/s]
  self._freq_base = start.freq.base
  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  0%|          | 0/50 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
  self._freq_base = start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  self._freq_base is None or self._freq_base == start.freq.base
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
  return _shift_timestamp_helper(ts, ts.freq, offset)
  self._freq_base is None or self._freq_base == start.freq.base
  ..., i0 : i0 + length * start.freq.n : start.freq.n
100%|██████████| 50/50 [01:14<00:00,  1.49s/it, epoch=1/100, avg_epoch_loss=6.3] 
100%|██████████| 50/50 [01:16<00:00,  1.52s/it, epoch=1/100, avg_epoch_loss=5.08]
100%|██████████| 50/50 [01:09<00:00,  1.40s/it, epoch=2/100, avg_epoch_loss=5.43]
100%|██████████| 50/50 [01:09<00:00

100%|██████████| 50/50 [01:08<00:00,  1.37s/it, epoch=78/100, avg_epoch_loss=4.94]
100%|██████████| 50/50 [00:56<00:00,  1.13s/it, epoch=78/100, avg_epoch_loss=4.05]
100%|██████████| 50/50 [01:09<00:00,  1.38s/it, epoch=79/100, avg_epoch_loss=4.94]
100%|██████████| 50/50 [00:56<00:00,  1.14s/it, epoch=79/100, avg_epoch_loss=4.04]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=80/100, avg_epoch_loss=4.05]
100%|██████████| 50/50 [01:09<00:00,  1.39s/it, epoch=80/100, avg_epoch_loss=4.94]
100%|██████████| 50/50 [00:57<00:00,  1.15s/it, epoch=81/100, avg_epoch_loss=4.03]
100%|██████████| 50/50 [01:08<00:00,  1.37s/it, epoch=81/100, avg_epoch_loss=4.94]
100%|██████████| 50/50 [00:56<00:00,  1.14s/it, epoch=82/100, avg_epoch_loss=4.03]
100%|██████████| 50/50 [01:08<00:00,  1.37s/it, epoch=82/100, avg_epoch_loss=4.94]
100%|██████████| 50/50 [00:56<00:00,  1.13s/it, epoch=83/100, avg_epoch_loss=4.04]
100%|██████████| 50/50 [01:07<00:00,  1.34s/it, epoch=83/100, avg_epoch_loss=4.93]
100%

In [78]:
(train_end - train_start)/60

100.15107010602951

In [79]:
# Pickle and save each model for future use
for mod_no in range(len(dtw_clust_deep_ar_mods)):
    with open(f'Results/Global/DeepAR/DTW/mod_{mod_no}', 'wb+') as f:
        pickle.dump(dtw_clust_deep_ar_mods[mod_no], f)

In [80]:
test_pred_start = time.time()
# Loop through all the clusters and compute test preds
# Again, we use joblib to do this in a parallel fashion and we use the tdqm_joblib function to print a progress bar
with tqdm_joblib(tqdm(desc="DTW Cluster DeepAR Test Preds", 
                      total=len(dtw_clust.cluster.unique()))) as progress_bar:
    dtw_clust_deep_ar_test_preds = Parallel(n_jobs=2)(delayed(compute_deepar_test_preds_clust)(dtw_clust_deep_ar_mods,
                                                                                               dtw_clust,
                                                                                               subset_df_list,
                                                                                               i
                                                                                              ) for i in range(1, len(dtw_clust.cluster.unique())+1))
test_pred_end = time.time()

DTW Cluster DeepAR Test Preds:  50%|██████      | 1/2 [58:35<58:35, 3515.37s/it]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27


  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
DTW Cluster DeepAR Test Preds: 100%|██████████| 2/2 [1:26:15<00:00, 2587.96s/it]


In [81]:
(test_pred_end - test_pred_start)/60

86.26532184282938

In [82]:
# Initialize an empty data frame in which to save test preds
dtw_clust_test_pred_df = pd.DataFrame()

# Loop through the test preds
for i in range(len(dtw_clust_deep_ar_test_preds)):
    # Grab a dictionary from the i'th entry in the test pred list
    sub_dict = dtw_clust_deep_ar_test_preds[i][0]
    # Grab the keys from that dict
    key_list = list(sub_dict.keys())
    # Loop through the keys and create a data frame of the test preds for that key (ts_index)
    for k in key_list:
        sub_df = pd.DataFrame({"ts_index": k,
                               "forec_mean": dtw_clust_deep_ar_test_preds[i][0][k],
                               "forec_median": dtw_clust_deep_ar_test_preds[i][1][k],
                               "lo_80": dtw_clust_deep_ar_test_preds[i][2][k],
                               "hi_80": dtw_clust_deep_ar_test_preds[i][3][k],
                               "lo_95": dtw_clust_deep_ar_test_preds[i][4][k],
                               "hi_95": dtw_clust_deep_ar_test_preds[i][5][k]
                              })
        # Append to the main test pred df
        dtw_clust_test_pred_df = dtw_clust_test_pred_df.append(sub_df)
# Reset the index of the df for easier use later
dtw_clust_test_pred_df = dtw_clust_test_pred_df.reset_index(drop=True)

In [83]:
# Initialize an empty data frame
test_set_df = pd.DataFrame()

# Save the test dictionaries into a list
dtw_clust_test_list = [dtw_clust_1_test_dict, 
                       dtw_clust_2_test_dict]

# For each dict in the list, loop through the kets in that dict and append the data to the empty test set df
for i in range(len(dtw_clust_test_list)):
    key_list = list(dtw_clust_test_list[i].keys())
    for k in key_list:
        test_set_df = test_set_df.append(dtw_clust_test_list[i][k])

# Reset the index to simplify working with the new df
test_set_df = test_set_df.reset_index(drop=True)

In [84]:
# Add the actual values as a column to the data frame
dtw_clust_test_pred_df['actual'] = test_set_df.y

# Compute the interval scores for each forecast
dtw_clust_test_pred_df['int_80_score'] = interval_score(dtw_clust_test_pred_df['actual'],
                                                        dtw_clust_test_pred_df['lo_80'],
                                                        dtw_clust_test_pred_df['hi_80'],
                                                        0.8)
                                                         
dtw_clust_test_pred_df['int_95_score'] = interval_score(dtw_clust_test_pred_df['actual'],
                                                        dtw_clust_test_pred_df['lo_95'],
                                                        dtw_clust_test_pred_df['hi_95'],
                                                        0.8)

In [85]:
# Initialize empty lists in which to save rmse, mae, and true mean
dtw_clust_mean_rmse_ls = list()
dtw_clust_mean_mae_ls = list()
dtw_clust_median_rmse_ls = list()
dtw_clust_median_mae_ls = list()
dtw_clust_true_means_ls = list()

for ts_idx in dtw_clust_test_pred_df.ts_index.unique():
    # Loop through each ts_index and subset the test preds
    dtw_clust_test_pred_df_sub = dtw_clust_test_pred_df.query("ts_index==@ts_idx").copy()
    
    # Compute the RMSE with the mean for the forecast and append to list
    mean_rmse_sub = mean_squared_error(dtw_clust_test_pred_df_sub['forec_mean'],
                                       dtw_clust_test_pred_df_sub['actual'],
                                       squared=False
                                      )
    dtw_clust_mean_rmse_ls.append(mean_rmse_sub)
    
    # Compute the MAE with the mean for the forecast and append to list
    mean_mae_sub = mean_absolute_error(dtw_clust_test_pred_df_sub['forec_mean'],
                                       dtw_clust_test_pred_df_sub['actual'])
    dtw_clust_mean_mae_ls.append(mean_mae_sub)
    
    # Compute the RMSE with the median for the forecast and append to list
    median_rmse_sub = mean_squared_error(dtw_clust_test_pred_df_sub['forec_median'],
                                         dtw_clust_test_pred_df_sub['actual'],
                                         squared=False
                                        )
    dtw_clust_median_rmse_ls.append(median_rmse_sub)
    
    # Compute the MAE with the median for the forecast and append to list
    median_mae_sub = mean_absolute_error(dtw_clust_test_pred_df_sub['forec_median'],
                                         dtw_clust_test_pred_df_sub['actual'])
    dtw_clust_median_mae_ls.append(median_mae_sub)
    
    # Compute the true mean and append to list
    actual_mean_sub = np.mean(dtw_clust_test_pred_df_sub['actual'])
    dtw_clust_true_means_ls.append(actual_mean_sub)          

In [86]:
# Print the point forecast scores using the mean as the forecast
np.mean(dtw_clust_mean_rmse_ls)

39.52216687772218

In [87]:
np.mean(dtw_clust_mean_mae_ls)

26.649797050960682

In [88]:
np.mean(np.array(dtw_clust_mean_rmse_ls)/np.array(dtw_clust_true_means_ls))

0.1711965734814829

In [89]:
np.mean(np.array(dtw_clust_mean_mae_ls)/np.array(dtw_clust_true_means_ls))

0.1153907377312456

In [90]:
# Print the point forecast scores using the median as the forecast
np.mean(dtw_clust_median_rmse_ls)

39.53631659871298

In [91]:
np.mean(dtw_clust_median_mae_ls)

26.652194984998875

In [92]:
np.mean(np.array(dtw_clust_median_rmse_ls)/np.array(dtw_clust_true_means_ls))

0.17125314008575154

In [93]:
np.mean(np.array(dtw_clust_median_mae_ls)/np.array(dtw_clust_true_means_ls))

0.11542798656017528

In [94]:
# Print the interval scores
dtw_clust_test_pred_df['int_80_score'].mean()

61.193451221833705

In [95]:
dtw_clust_test_pred_df['int_95_score'].mean()

101.7514858341727

In [96]:
# Group by ts_index and compute the scaled interval scores
dtw_clust_test_pred_df_grouped = dtw_clust_test_pred_df.groupby("ts_index")\
.agg({"int_95_score":"mean", "int_80_score":"mean", "actual":"mean"}).reset_index()

dtw_clust_test_pred_df_grouped['int_95_score_scaled'] = dtw_clust_test_pred_df_grouped['int_95_score']/dtw_clust_test_pred_df_grouped['actual']
dtw_clust_test_pred_df_grouped['int_80_score_scaled'] = dtw_clust_test_pred_df_grouped['int_80_score']/dtw_clust_test_pred_df_grouped['actual']

In [97]:
# Print the scaled interval scores
dtw_clust_test_pred_df_grouped[['int_80_score_scaled', 'int_95_score_scaled']].mean()

int_80_score_scaled    0.286974
int_95_score_scaled    0.471749
dtype: float64

In [98]:
# Save results to CSV
dtw_clust_test_pred_df.to_csv("Results/Global/DeepAR/DTW/test_set_pred.csv", index=False)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27


  timestamp = pd.Timestamp(timestamp_input, freq=freq)
  if isinstance(timestamp.freq, Tick):
  timestamp.floor(timestamp.freq), timestamp.freq
  return pd.Timestamp(
