# Novel Implementation of KIWI task

In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import tsdm

In [None]:
task = tsdm.tasks.KIWI_RUNS_TASK()

task.split_idx

In [None]:
ts = task.dataset.timeseries

# How does the sampler need to look like?

- 1. Sample from the index
    - Returns (ts, md) = ds[i]
- 2. Subsample from the timeseries
    - (ts[a:b], md)
- 3. Create sample from the slice

Size: ~1000-3000 timestamps.
∆t:  1s-11m, avg: 33.66s

obs-horizon: 2h ~ 200 timestamps -> 256
forecasting horizon: 1h ~ 100 timestamps -> 128

In [None]:
from tsdm.random.samplers import (
    HierarchicalSampler,
    SequenceSampler,
    SlidingWindowSampler,
)

In [None]:
dt = ts.index.to_frame(index=False).groupby(["run_id", "experiment_id"]).diff()
dt.mean()

In [None]:
from tsdm.utils.data import MappingDataset, TimeSeriesDataset

md = task.metadata

In [None]:
DS = MappingDataset({
    idx: TimeSeriesDataset(ts.loc[idx], metadata=md.loc[idx]) for idx in md.index
})

In [None]:
subsamplers = {
    key: SlidingWindowSampler(
        ds.timeseries.index,
        horizons=["2h", "1h"],
        stride="15m",
        shuffle=True,
        mode="slices",
    )
    for key, ds in DS.items()
}
sampler = HierarchicalSampler(DS, subsamplers, shuffle=True)

In [None]:
sample = next(iter(sampler))

In [None]:
from torch.utils.data import DataLoader

In [None]:
x = next(iter(DataLoader(DS, sampler=sampler, batch_size=10, collate_fn=lambda x: x)))

In [None]:
ds = next(iter(DS.values()))

In [None]:
subsamplers = {
    key: SequenceSampler(ds, seq_len=300, stride=50, shuffle=True)
    for key, ds in DS.items()
}