In [14]:
%load_ext autoreload
%autoreload 2
from dataclasses import dataclass
from pathlib import Path
import pandas as pd
import sys
sys.path.append('../../')
from omegaconf import OmegaConf
from dacite import from_dict

from tsbench.tslib.loading.csv_loader import CSVTimeSeriesDataset
from tsbench.tslib.traindataset_generator import TimeSeriesTrainDatasetGeneratorConfig, TimeSeriesTrainDatasetGenerator

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### with unormalized Dataset

Then we need to create a configuration for tslib. For this we need to mirror the config defined in `tslib/traindataset_generator.py`:

In [22]:

# this is only a minimal config and the time series are not normalized

cfg = """ 
pipeline:
  dataset:
    name: multicsvloader
    kwargs:
      data_folder: /home/daran/Program/tsbench/datafiles/nsdb_2019
      meta_columns: []
  windowing:
    window_size: 20 # each time series for the model will have length 10
    stride: 5 # each time series will be shifted by 5
  # TODO add normalization of the features
  # store normalizer values in a file and load them in the normalizer
  feature_selector:
    select_features:
        - 'dni'
        - 'dew_point'
        - 'air_temperature'
        - 'wind_speed'
        - 'total_precipitable_water'
        - 'dhi'
    drop_features: []
  target_generator:
    name: many_to_many_regression
    kwargs:
      input_features:
        - 'dni'
        - 'dew_point'
        - 'air_temperature'
        - 'wind_speed'
        - 'total_precipitable_water'
      target_features:
        - 'dhi'
      target_shift: 0
      added_meta_features: []

split: 
  name: random_split
  kwargs:
    lengths: [0.8, 0.2] # train, val
"""
cfg = OmegaConf.create(cfg)

cfg = from_dict(data_class=TimeSeriesTrainDatasetGeneratorConfig, data=OmegaConf.to_container(cfg))

In [23]:
cfg

TimeSeriesTrainDatasetGeneratorConfig(pipeline=ComposedTimeSeriesDatasetConfig(dataset=NameAndKwargs(name='multicsvloader', kwargs={'data_folder': '/home/daran/Program/tsbench/datafiles/nsdb_2019', 'meta_columns': []}), windowing=TimeSeriesWindowDatasetConfig(window_size=20, stride=5, initial_offset=0, end_offset=0, future_steps=0, past_steps=0), partition_filter=NameAndKwargs(name='no_filter', kwargs={}), feature_selector=FeatureSelectorConfig(select_features=['dni', 'dew_point', 'air_temperature', 'wind_speed', 'total_precipitable_water', 'dhi'], drop_features=[]), normalizer=NormalizerConfig(normalizer_values={}, normalize_features=set(), drop_zero_variance_features=True, eps=1e-08, normalizer_file=None), transforms=[], target_generator=NameAndKwargs(name='many_to_many_regression', kwargs={'input_features': ['dni', 'dew_point', 'air_temperature', 'wind_speed', 'total_precipitable_water'], 'target_features': ['dhi'], 'target_shift': 0, 'added_meta_features': []}), cache_processed_dat

In [26]:
ds_gen = TimeSeriesTrainDatasetGenerator(cfg)
ds_gen.generate_dataset()

Sample: 100%|██████████| 50/50 [00:00<00:00, 373.84it/s]
Sample: 100%|██████████| 50/50 [00:00<00:00, 381.52it/s]
Generating window index: 100%|██████████| 50/50 [00:01<00:00, 31.80it/s]
Total number of dropped timesteps due to windowing: 0


In [27]:
train_ds = ds_gen.train_split

In [28]:
train_ds

<tsbench.tslib.postprocessing.dataset_subset.TimeSeriesTrainDatasetSubset at 0x7fdf78189fc0>

In [29]:
train_ds.get_meta_data_summary()

Unnamed: 0,ComposedTimeSeriesDataset_index,MultiCSVTimeSeriesDataset_index,key,index,num_timesteps
0,45155,12,King-Seattle_wi[|15715:15715|15715:15735|15735...,45155,20
1,78542,22,Wayne-Detroit_wi[|7600:7600|7600:7620|7620:7620|],78542,20
2,119964,34,Brazos-Bryan_wi[|4650:4650|4650:4670|4670:4670|],119964,20
3,100796,28,Dallas-Dallas_wi[|13840:13840|13840:13860|1386...,100796,20
4,141228,40,Travis-Austin_wi[|5940:5940|5940:5960|5960:5960|],141228,20
...,...,...,...,...,...
140035,173845,49,Utah-Provo_wi[|11480:11480|11480:11500|11500:1...,173845,20
140036,71315,20,Ventura-None_wi[|6475:6475|6475:6495|6495:6495|],71315,20
140037,34983,9,District_of_Columbia-Washington_wi[|17370:1737...,34983,20
140038,65807,18,Santa_Clara-San_Jose3_wi[|13945:13945|13945:13...,65807,20


In [30]:
train_ds[0]

{'x': tensor([[0.0000e+00, 8.1000e+00, 8.2000e+00, 1.0000e+00, 1.5000e+00],
         [0.0000e+00, 7.3000e+00, 7.8000e+00, 1.0000e+00, 1.3000e+00],
         [0.0000e+00, 7.3000e+00, 7.6000e+00, 1.0000e+00, 1.1000e+00],
         [0.0000e+00, 6.8000e+00, 7.5000e+00, 1.0000e+00, 1.0000e+00],
         [0.0000e+00, 6.8000e+00, 7.3000e+00, 9.0000e-01, 1.0000e+00],
         [0.0000e+00, 6.3000e+00, 7.1000e+00, 9.0000e-01, 9.0000e-01],
         [0.0000e+00, 6.3000e+00, 6.7000e+00, 9.0000e-01, 9.0000e-01],
         [0.0000e+00, 5.7000e+00, 6.4000e+00, 9.0000e-01, 9.0000e-01],
         [0.0000e+00, 5.8000e+00, 6.1000e+00, 9.0000e-01, 9.0000e-01],
         [0.0000e+00, 5.3000e+00, 5.8000e+00, 1.0000e+00, 9.0000e-01],
         [0.0000e+00, 5.3000e+00, 5.5000e+00, 1.0000e+00, 9.0000e-01],
         [0.0000e+00, 4.8000e+00, 5.2000e+00, 1.0000e+00, 9.0000e-01],
         [5.5000e+01, 4.9000e+00, 5.4000e+00, 9.0000e-01, 9.0000e-01],
         [3.3900e+02, 5.3000e+00, 5.7000e+00, 7.0000e-01, 8.0000e-01],
 

In [31]:
train_ds[0]['x'].shape

torch.Size([20, 5])

In [32]:
len(train_ds), len(ds_gen.validation_split), type(ds_gen.validation_split)

(140040,
 35010,
 tsbench.tslib.postprocessing.dataset_subset.TimeSeriesTrainDatasetSubset)

In [33]:
train_ds.input_dim, train_ds.context_length

(5, 20)

In [34]:
train_ds.target_dim, train_ds.target_length

(1, 20)

In [35]:
# the original timeseries
train_ds.dataset.dataset.dataset.dataset.get_meta_data_summary()

Sample: 100%|██████████| 50/50 [00:00<00:00, 359.42it/s]


Unnamed: 0,MultiCSVTimeSeriesDataset_index,key,index,num_timesteps
0,0,Richland-Columbia2,0,17520
1,1,Jackson-Kansas_City1,1,17520
2,2,Maricopa-Phoenix,2,17520
3,3,Miami-Dade-Miami,3,17520
4,4,San_Francisco-San_Francisco1,4,17520
5,5,San_Diego-National_City,5,17520
6,6,Fulton-Atlanta,6,17520
7,7,Clark-Las_Vegas2,7,17520
8,8,Onondaga-Syracuse,8,17520
9,9,District_of_Columbia-Washington,9,17520
