In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from gluonts.dataset.repository.datasets import get_dataset, dataset_recipes
from gluonts.dataset.util import to_pandas

import torch
print(torch.__version__)

# Load Datasets from Gluonts

In [None]:
print(f"Available datasets: {list(dataset_recipes.keys())}")

In [None]:
d_name = "exchange_rate"
dataset = get_dataset(d_name, regenerate=False)
dataset.metadata

In [None]:
# to TimeSeriesDataSet
train_iter = iter(dataset.train)
test_iter = iter(dataset.test)

data_df = pd.DataFrame(columns=['datetime', 'sensor', 'value'])
for i in range(int(dataset.metadata.feat_static_cat[0].cardinality)):
    train_entry = next(train_iter)
    test_entry = next(test_iter)

    train_series = to_pandas(train_entry)
    test_series = to_pandas(test_entry)

    sensor_readings = pd.concat([train_series, test_series[train_series.index[-1]+1:]]).to_frame(name='value')
    sensor_readings.reset_index(inplace=True, names=['datetime'])
    sensor_readings['sensor'] = i

    data_df = pd.concat([data_df, sensor_readings])

data_df = data_df.astype(dict(datetime='datetime64[ns]', sensor=str))

time_idx_df = pd.DataFrame(data_df['datetime'].unique(), columns=["datetime"]).sort_values(by="datetime").reset_index(drop=True).reset_index(names="time_idx")
data_df = pd.merge(data_df, time_idx_df, left_on="datetime", right_on="datetime", how="left")

data_df.to_csv("../datasets/%s.csv"%(d_name), index=False)
print(data_df.shape[0])
print(data_df.head())

# Load Datasets from H5

In [None]:
d_name = "pems04_flow"  # pems-bay, metr-la, pemsd7m, gz-metro, hz-metro, pems03_flow, pems04_flow, pems07_flow, pems08_flow, seattle
data_df = pd.read_hdf("../datasets/%s/%s.h5"%(d_name, d_name))
print(data_df.shape)

In [None]:
data_df.reset_index(inplace=True)
data_df['time_idx'] = np.arange(data_df.shape[0])
data_df.rename(columns={'index':'datetime'}, inplace=True)

data_df = pd.melt(data_df, id_vars=[data_df.columns[0], data_df.columns[-1]], var_name='sensor', value_vars=data_df.columns[1:-1])
data_df = data_df.astype(dict(sensor=int, time_idx=int))
data_df = data_df.astype(dict(sensor=str))
print(data_df.shape)
print(data_df.head())

data_df.to_csv("../datasets/%s.csv"%(d_name), index=False)