In [None]:

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

In [None]:

from gluonts.dataset.repository import get_dataset, dataset_names
from gluonts.dataset.util import to_pandas
print(f"Available datasets: {dataset_names}")

In [None]:
dataset_names
dataset = get_dataset("m4_hourly")
dataset.train

In [None]:
entry_iterable = iter(dataset.train)
#entry = next(iter(dataset.train))
entry = next(entry_iterable)
train_series = to_pandas(entry)
train_series.plot(c='b',marker='x')
#plt.grid(which="both")
#plt.legend(["train series"], loc="upper left")
entry_2 = next(entry_iterable)
#entry_2 = next(iter(dataset.train))
print(f"type(entry): {type(entry)}")
train_series_2 = to_pandas(entry_2)
train_series_2.plot(c='r',marker='o')
plt.grid(which="both")
plt.legend(["train series_2"], loc="upper left")

plt.show()
train_series==train_series_2
count = 0
plt.figure()
for entry_idx, entry_data in enumerate(dataset.train):
    count +=1
    cur_entry_data = to_pandas(entry_data)
    if entry_idx > 1:# and entry_idx <= 3:
        #print(f"{to_pandas(entry_data)}")
        
        print(f"{np.all(to_pandas(entry_data)==to_pandas(entry_data_prev))}")
        
    cur_entry_data.plot(marker='o',label=entry_idx)
    entry_data_prev = entry_data
    if entry_idx == 40:
        break
plt.grid(which="both")
plt.legend( loc="upper left")

print(f"Counter: {entry_idx}")

In [None]:
dataset.train

In [None]:
entry = next(iter(dataset.test))
test_series = to_pandas(entry)
test_series.plot()
plt.axvline(train_series.index[-1], color="r")  # end of train dataset
plt.grid(which="both")
plt.legend(["test series", "end of train series"], loc="upper left")
plt.show()

# NB: Why it seems that only first item from dataset.[train|test] is used in this analysis prediction. Other items have totally different means and variances. That is, is estimator with call predictor = estimator.train(dataset.train) really getting only the above series? No, all series are passed, but only 0th is analyzed in the tutorial. You can access the next dataseries with indexing "1", the third "2", etc. That is, all data series are getting predicted with above call.

In [None]:
print(
    f"Length of forecasting window in test dataset: {len(test_series) - len(train_series)}"
)
print(f"Recommended prediction horizon: {dataset.metadata.prediction_length}")
print(f"Frequency of the time series: {dataset.metadata.freq}")
print(
    f"Length of test dataset: {len(test_series)}"
)
print(
    f"Length of train dataset: {len(train_series)}"
)

# Custom data sets

In [None]:
N = 10  # number of time series
T = 100  # number of timesteps
prediction_length = 24
freq = "1H"
custom_dataset = np.random.normal(size=(N, T))
start = pd.Period("01-01-2019", freq=freq)  # can be different for each time series

In [None]:
start

In [None]:
from gluonts.dataset.common import ListDataset

In [None]:
# train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
train_ds = ListDataset(
    [{"target": x, "start": start} for x in custom_dataset[:, :-prediction_length]],
    freq=freq,
)
# test dataset: use the whole dataset, add "target" and "start" fields
test_ds = ListDataset(
    [{"target": x, "start": start} for x in custom_dataset], freq=freq
)

In [None]:
custom_dataset[:, :-prediction_length].shape

In [None]:
custom_dataset[:, :].shape

# Training an existing model (Estimator)
GluonTS comes with a number of pre-built models. All the user needs to do is configure some hyperparameters. The existing models focus on (but are not limited to) probabilistic forecasting. Probabilistic forecasts are predictions in the form of a probability distribution, rather than simply a single point estimate.

We will begin with GluonTS’s pre-built feedforward neural network estimator, a simple but powerful forecasting model. We will use this model to demonstrate the process of training a model, producing forecasts, and evaluating the results.

GluonTS’s built-in feedforward neural network (SimpleFeedForwardEstimator) accepts an input window of length context_length and predicts the distribution of the values of the subsequent prediction_length values. In GluonTS parlance, the feedforward neural network model is an example of an Estimator. In GluonTS, Estimator objects represent a forecasting model as well as details such as its coefficients, weights, etc.

In general, each estimator (pre-built or custom) is configured by a number of hyperparameters that can be either common (but not binding) among all estimators (e.g., the prediction_length) or specific for the particular estimator (e.g., number of layers for a neural network or the stride in a CNN).

Finally, each estimator is configured by a Trainer, which defines how the model will be trained i.e., the number of epochs, the learning rate, etc.

In [None]:
from gluonts.mx import SimpleFeedForwardEstimator, Trainer

In [None]:
estimator = SimpleFeedForwardEstimator(
    num_hidden_dimensions=[10],
    prediction_length=dataset.metadata.prediction_length,
    context_length=100,
    trainer=Trainer(ctx="cpu", epochs=5, learning_rate=1e-3, num_batches_per_epoch=100),
)

After specifying our estimator with all the necessary hyperparameters we can train it using our training dataset dataset.train by invoking the train method of the estimator. The training algorithm returns a fitted model (or a Predictor in GluonTS parlance) that can be used to construct forecasts.


In [None]:
predictor = estimator.train(dataset.train)

In [None]:
dataset.metadata.prediction_length

# Visualize and evaluate forecasts
With a predictor in hand, we can now predict the last window of the dataset.test and evaluate our model’s performance.

GluonTS comes with the make_evaluation_predictions function that automates the process of prediction and model evaluation. Roughly, this function performs the following steps:

- Removes the final window of length prediction_length of the dataset.test that we want to predict
- The estimator uses the remaining data to predict (in the form of sample paths) the “future” window that was just removed
- The module outputs the forecast sample paths and the dataset.test (as python generator objects)

In [None]:
dataset.metadata.prediction_length

In [None]:
from gluonts.evaluation import make_evaluation_predictions

In [None]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset.test,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

First, we can convert these generators to lists to ease the subsequent computations.

In [None]:
forecasts = list(forecast_it)
tss = list(ts_it)

We can examine the first element of these lists (that corresponds to the first time series of the dataset). Let’s start with the list containing the time series, i.e., tss. We expect the first entry of tss to contain the (target of the) first time series of dataset.test.

In [None]:
# first entry of the time series list
ts_entry = tss[0]
# the second entry of the time series
ts_entry = tss[1]

In [None]:
# first 5 values of the time series (convert from pandas to numpy)
np.array(ts_entry[:5]).reshape(
    -1,
)
#np.array(ts_entry).shape

In [None]:
len(forecasts)

In [None]:
# the first entry of dataset.test
dataset_test_entry_iterable = (iter(dataset.test))
dataset_test_entry = next(dataset_test_entry_iterable)
# the second entry of dataset.test
dataset_test_entry = next(dataset_test_entry_iterable)




In [None]:
# first 5 values
dataset_test_entry["target"][:5]

The entries in the forecast list are a bit more complex. They are objects that contain all the sample paths in the form of numpy.ndarray with dimension (num_samples, prediction_length), the start date of the forecast, the frequency of the time series, etc. We can access all this information by simply invoking the corresponding attribute of the forecast object.

In [None]:
# the first entry of the forecast list
forecast_entry = forecasts[0]
# the second entry of the forecast list
forecast_entry = forecasts[1]

In [None]:
print(f"Number of sample paths: {forecast_entry.num_samples}")
print(f"Dimension of samples: {forecast_entry.samples.shape}")
print(f"Start date of the forecast window: {forecast_entry.start_date}")
print(f"Frequency of the time series: {forecast_entry.freq}")

We can also do calculations to summarize the sample paths, such as computing the mean or a quantile for each of the 48 time steps in the forecast window.

In [None]:
print(f"Mean of the future window:\n {forecast_entry.mean}")
print(f"0.5-quantile (median) of the future window:\n {forecast_entry.quantile(0.5)}")

In [None]:
forecast_entry.mean.shape

Forecast objects have a plot method that can summarize the forecast paths as the mean, prediction intervals, etc. The prediction intervals are shaded in different colors as a “fan chart”.

In [None]:
plt.plot(ts_entry[-150:].to_timestamp())
forecast_entry.plot(show_label=True)
plt.legend()

In [None]:
ts_entry[-150:].shape

In [None]:
ts_entry[-150:].to_timestamp()

In [None]:
forecast_entry.mean.shape

In [None]:
forecast_entry.mean

In [None]:
forecast_entry.samples.shape

We can also evaluate the quality of our forecasts numerically. In GluonTS, the Evaluator class can compute aggregate performance metrics, as well as metrics per time series (which can be useful for analyzing performance across heterogeneous time series).

In [None]:
from gluonts.evaluation import Evaluator

In [None]:
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(tss, forecasts)

In [None]:
len(tss) # number of different time series?

The aggregate metrics, agg_metrics, aggregate both across time-steps and across time series.

In [None]:
print(json.dumps(agg_metrics, indent=4))

Individual metrics are aggregated only across time-steps.

In [None]:
item_metrics.head() # i.e., time-steps is time axis and time series refers to individual predicted time-series (?) The total number of rows is 414;
# len(item_metrics) 414

In [None]:
item_metrics.plot(x="MSIS", y="MASE", kind="scatter")
plt.grid(which="both")
plt.show()