In [3]:
from pathlib import Path
import pandas as pd
from pytorch_forecasting.data.timeseries import TimeSeriesDataSet
import os

In [4]:
zone='SUD'
batch_size=256
test_batch_size=1000

num_workers = 0
print(f'Building {zone} data loader with {num_workers} workers')

Building SUD data loader with 0 workers


In [7]:
PATH = '../data'
FILE = os.path.join(PATH, f'wind_{zone}_idx.csv')

data = pd.read_csv(FILE)

In [8]:
min_encoder_length = 1
max_encoder_length = 400
min_prediction_length = 1
max_prediction_length = 7
training_cutoff = data["time_idx"].max() - max_prediction_length

In [9]:
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx='time_idx',
    target='energy',
    group_ids=['group'],
    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=min_prediction_length,
    max_prediction_length=max_prediction_length,
    # target_normalizer=,
    # allow_missing_timesteps=True,
    time_varying_unknown_reals=['energy']
)

testing = TimeSeriesDataSet.from_dataset(
    training, data, predict=True, stop_randomization=True
)

In [10]:
training.get_parameters()

{'time_idx': 'time_idx',
 'target': 'energy',
 'group_ids': ['group'],
 'weight': None,
 'max_encoder_length': 400,
 'min_encoder_length': 1,
 'min_prediction_idx': 0,
 'min_prediction_length': 1,
 'max_prediction_length': 7,
 'static_categoricals': [],
 'static_reals': ['encoder_length'],
 'time_varying_known_categoricals': [],
 'time_varying_known_reals': [],
 'time_varying_unknown_categoricals': [],
 'time_varying_unknown_reals': ['energy'],
 'variable_groups': {},
 'constant_fill_strategy': {},
 'allow_missing_timesteps': False,
 'lags': {},
 'add_relative_time_idx': False,
 'add_target_scales': False,
 'add_encoder_length': True,
 'target_normalizer': GroupNormalizer(
 	method='standard',
 	groups=[],
 	center=True,
 	scale_by_group=False,
 	transformation='relu',
 	method_kwargs={}
 ),
 'categorical_encoders': {'__group_id__group': NaNLabelEncoder(add_nan=False, warn=True),
  'group': NaNLabelEncoder(add_nan=False, warn=True)},
 'scalers': {'encoder_length': StandardScaler()},
 '

In [11]:
# create dataloaders for model
train_loader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=0
)
test_loader = testing.to_dataloader(
    train=False, batch_size=test_batch_size, num_workers=0
)

In [12]:
# and load the first batch
x, y = next(iter(train_loader))
print("x =", x)
print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")

x = {'encoder_cat': tensor([], size=(256, 400, 0), dtype=torch.int64), 'encoder_cont': tensor([[[ 1.0000, -0.8688],
         [ 1.0000, -0.6241],
         [ 1.0000, -0.7220],
         ...,
         [ 1.0000, -0.7612],
         [ 1.0000, -0.0073],
         [ 1.0000, -0.7514]],

        [[ 1.0000, -0.1639],
         [ 1.0000,  0.7955],
         [ 1.0000, -0.2520],
         ...,
         [ 1.0000, -0.3597],
         [ 1.0000, -0.7318],
         [ 1.0000, -0.8982]],

        [[ 1.0000,  0.0319],
         [ 1.0000,  0.2081],
         [ 1.0000,  0.1591],
         ...,
         [ 1.0000,  0.2081],
         [ 1.0000, -0.7709],
         [ 1.0000, -0.2814]],

        ...,

        [[ 0.6850, -0.4283],
         [ 0.6850, -0.6143],
         [ 0.6850, -0.8688],
         ...,
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000]],

        [[-0.9550,  0.5214],
         [-0.9550,  1.1871],
         [-0.9550,  1.5004],
         ...,
         [ 0.0000,  0.0000],
         

In [None]:
len(train_loader)

16

In [None]:
type(next(iter(train_loader))[0]), len(next(iter(train_loader))[0])

(dict, 11)

In [None]:
type(next(iter(train_loader))[1]), len(next(iter(train_loader))[1])

(tuple, 2)

In [None]:
next(iter(train_loader))[1]

(tensor([[ 2.0000,  2.0000,  2.0000,  ...,  2.0000,  2.0000,  2.0000],
         [20.2000, 20.2000, 20.2000,  ..., 20.2000, 20.2000, 20.2000],
         [ 6.9000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [26.7000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [26.7000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 1.5000,  1.5000,  1.5000,  ...,  1.5000,  1.5000,  1.5000]]),
 None)

In [None]:
next(iter(train_loader))[0].keys()

dict_keys(['encoder_cat', 'encoder_cont', 'encoder_target', 'encoder_lengths', 'decoder_cat', 'decoder_cont', 'decoder_target', 'decoder_lengths', 'decoder_time_idx', 'groups', 'target_scale'])

In [None]:
for k in next(iter(train_loader))[0].keys():
    print(k, '\t', next(iter(train_loader))[0][k].shape)

encoder_cat 	 torch.Size([256, 400, 0])
encoder_cont 	 torch.Size([256, 400, 2])
encoder_target 	 torch.Size([256, 399])
encoder_lengths 	 torch.Size([256])
decoder_cat 	 torch.Size([256, 7, 0])
decoder_cont 	 torch.Size([256, 7, 2])
decoder_target 	 torch.Size([256, 7])
decoder_lengths 	 torch.Size([256])
decoder_time_idx 	 torch.Size([256, 7])
groups 	 torch.Size([256, 1])
target_scale 	 torch.Size([256, 2])
