In [1]:
import numpy as np
import pandas as pd

from pytorch_forecasting import TimeSeriesDataSet
from torch.utils.data import WeightedRandomSampler

In [2]:
test_data = pd.DataFrame(
    dict(
        value=np.random.rand(30) - 0.5,
        group=np.repeat(np.arange(3), 10),
        time_idx=np.tile(np.arange(10), 3),
    )
)
test_data.tail()

Unnamed: 0,value,group,time_idx
25,-0.453143,2,5
26,-0.259933,2,6
27,0.240474,2,7
28,-0.081867,2,8
29,0.005289,2,9


In [5]:
# create the dataset from the pandas dataframe
dataset = TimeSeriesDataSet(
    test_data,
    group_ids=["group"],
    target="value",
    time_idx="time_idx",
    min_encoder_length=5,
    max_encoder_length=5,
    min_prediction_length=2,
    max_prediction_length=2,
    time_varying_unknown_reals=["value"],
)

In [6]:
dataset.get_parameters()

{'time_idx': 'time_idx',
 'target': 'value',
 'group_ids': ['group'],
 'weight': None,
 'max_encoder_length': 5,
 'min_encoder_length': 5,
 'min_prediction_idx': np.int64(0),
 'min_prediction_length': 2,
 'max_prediction_length': 2,
 'static_categoricals': None,
 'static_reals': None,
 'time_varying_known_categoricals': None,
 'time_varying_known_reals': None,
 'time_varying_unknown_categoricals': None,
 'time_varying_unknown_reals': ['value'],
 'variable_groups': None,
 'constant_fill_strategy': None,
 'allow_missing_timesteps': False,
 'lags': None,
 'add_relative_time_idx': False,
 'add_target_scales': False,
 'add_encoder_length': False,
 'target_normalizer': GroupNormalizer(
 	method='standard',
 	groups=None,
 	center=True,
 	scale_by_group=False,
 	transformation=None,
 	method_kwargs={}
 ),
 'categorical_encoders': {'__group_id__group': NaNLabelEncoder(add_nan=False, warn=True),
  'group': NaNLabelEncoder(add_nan=False, warn=True)},
 'scalers': {},
 'randomize_length': None,
 '

In [7]:
# convert the dataset to a dataloader
dataloader = dataset.to_dataloader(batch_size=4)

# and load the first batch
x, y = next(iter(dataloader))
print("x =", x)
print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")

x = {'encoder_cat': tensor([], size=(4, 5, 0), dtype=torch.int64), 'encoder_cont': tensor([[[-0.0175],
         [ 1.2501],
         [-0.4612],
         [-0.9384],
         [-0.2445]],

        [[ 1.0438],
         [ 0.9780],
         [-0.7666],
         [ 0.4912],
         [-0.5486]],

        [[ 0.9780],
         [-0.7666],
         [ 0.4912],
         [-0.5486],
         [ 0.9100]],

        [[-1.2037],
         [ 1.1265],
         [-0.0065],
         [-0.0175],
         [ 1.2501]]]), 'encoder_target': tensor([[ 0.0515,  0.3661, -0.0587, -0.1771, -0.0049],
        [ 0.3149,  0.2985, -0.1345,  0.1777, -0.0804],
        [ 0.2985, -0.1345,  0.1777, -0.0804,  0.2817],
        [-0.2430,  0.3354,  0.0542,  0.0515,  0.3661]]), 'encoder_lengths': tensor([5, 5, 5, 5]), 'decoder_cat': tensor([], size=(4, 2, 0), dtype=torch.int64), 'decoder_cont': tensor([[[ 0.7905],
         [-1.2876]],

        [[ 0.9100],
         [ 0.4630]],

        [[ 0.4630],
         [-1.3411]],

        [[-0.4612],
   

In [8]:
dir(dataset)

['__add__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__firstlineno__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__static_attributes__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_categorical_encoders',
 '_check_params',
 '_check_tensors',
 '_collate_fn',
 '_constant_fill_strategy',
 '_construct_index',
 '_data_properties',
 '_data_to_tensors',
 '_get_auto_normalizer',
 '_get_lagged_names',
 '_group_ids',
 '_group_ids_mapping',
 '_lags',
 '_overwrite_values',
 '_preprocess_data',
 '_scalers',
 '_set_lagged_variables',
 '_set_target_normalizer',
 '_static_categoricals',
 '_static_reals',
 '_time_varying_known_categori

In [10]:
test_data

Unnamed: 0,value,group,time_idx
0,0.314872,0,0
1,0.298547,0,1
2,-0.134465,0,2
3,0.177728,0,3
4,-0.080358,0,4
5,0.281673,0,5
6,0.170725,0,6
7,-0.277053,0,7
8,0.455962,0,8
9,0.018005,0,9


In [11]:
type(dataset.index)

pandas.core.frame.DataFrame

In [12]:

# length of probabilties for sampler have to be equal to the length of index
probabilities = np.sqrt(1 + data.loc[dataset.index, "target"])
sampler = WeightedRandomSampler(probabilities, len(probabilities))
dataset.to_dataloader(train=True, sampler=sampler, shuffle=False)


NameError: name 'data' is not defined