In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import tsdm
from tsdm.datasets import TimeSeriesCollection
from tsdm.random.samplers import HierarchicalSampler, SlidingWindowSampler

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

In [None]:
ds = tsdm.datasets.KIWI_RUNS()

In [None]:
ds.timeseries

In [None]:
ds.timeseries

In [None]:
ds.time_features.dtypes

In [None]:
from tsdm.datasets import KIWI

In [None]:
ds = KIWI()
ds

In [None]:
ds[439, 15325]

In [None]:
from tsdm.tasks import KIWI_RUNS_TASK

task = KIWI_RUNS_TASK()

In [None]:
TSC = TimeSeriesCollection(
    index=ds.index,
    timeseries=ds.timeseries,
    metadata=ds.metadata,
    time_features=ds.time_features,
    value_features=ds.value_features,
    metadata_features=ds.metadata_features,
)

## Sampler

In [None]:
ts = ds.timeseries.loc[(439, 15325)]

In [None]:
sampler = SlidingWindowSampler(ts.index, horizons=["2h", "1h"], stride="1h")

In [None]:
next(iter(sampler))

## Construct the Sampler

In [None]:
from tsdm.datasets import *
from tsdm.tasks import *

In [None]:
task = KiwiTask()
task.splits[0, "train"]

In [None]:
task.train_partition

In [None]:
task.dataloaders[0, "train"]

In [None]:
task = InSilicoTask()
task.validate_folds()

In [None]:
mask = task.folds[0, "train"]
mask

In [None]:
task.dataset.timeseries.loc[mask[mask].index]

In [None]:
df

In [None]:
idx = self.dataset.index

df = idx.to_frame(index=False).set_index(idx.names)

groups = df.groupby(idx.names, sort=False).ngroup()

In [None]:
groups

In [None]:
groups.name

In [None]:
fold

In [None]:
fold = task.folds[0, "train"]
# task.dataset[fold]

In [None]:
task.splits[0, "train"]

In [None]:
task.index

In [None]:
task.splits[0, "train"]

In [None]:
KiwiTask().folds

In [None]:
TimeSeriesCollection(InSilicoData().dataset)[16130]

In [None]:
from torch.utils.data import SubsetRandomSampler

In [None]:
from tsdm.tasks import KiwiTask

In [None]:
task = KiwiTask()

In [None]:
next(iter(SubsetRandomSampler(TSC.index)))

In [None]:
TSC[(439, 15325)]

In [None]:
subsamplers = {
    key: SlidingWindowSampler(ds.timeseries.index, horizons=["2h", "1h"], stride="1h")
    for key, ds in TSC.items()
}
sampler = HierarchicalSampler(TSC, subsamplers, shuffle=False)

In [None]:
from tsdm.datasets import InSilicoData

ds = InSilicoData()

ds.timeseries

In [None]:
outer_key, (forecasting_horizon, prediction_horizon) = next(iter(sampler))

In [None]:
TSC[outer_key][forecasting_horizon]

# TimeSeriesCollectionForecastingTask

In [None]:
TSC

In [None]:
from tsdm.tasks import TimeSeriesTaskDataset

targets = ["Base", "DOT", "Glucose", "OD600"]
observables = [
    "Base",
    "DOT",
    "Glucose",
    "OD600",
    "Acetate",
    "Fluo_GFP",
    "pH",
]
covariates = [
    "Cumulated_feed_volume_glucose",
    "Cumulated_feed_volume_medium",
    "InducerConcentration",
    "StirringSpeed",
    "Flow_Air",
    "Temperature",
    "Probe_Volume",
]

In [None]:
key = next(iter(sampler))
outer_key, (observation_horizon, forecasting_horizon) = key
TSC[outer_key][observation_horizon]

In [None]:
TSC[439, 15325]

# Collction Test

In [None]:
task = TimeSeriesTaskDataset(
    TSC,
    targets=targets,
    observables=observables,
    covariates=covariates,
    sample_format=("sparse", "sparse"),
)
sample = task[key]

# Dataset Test

In [None]:
task = TimeSeriesTaskDataset(
    TSC[439, 15325],
    targets=targets,
    observables=observables,
    covariates=covariates,
    sample_format=("masked", "masked"),
)
sample = task[key[1]]

In [None]:
raise

In [None]:
if sample.inputs.t_target is not None:
    diff = sample.inputs.t_target.index.difference(sample.targets.y.index)
    sample.inputs.t_target.drop(diff, inplace=True)
sample

In [None]:
raise

In [None]:
diff = sample.inputs.t_target.index.difference(sample.targets.y.index)
sample.inputs.t_target.drop(diff, inplace=True)
sample

In [None]:
raise

# Mapping Dataset

In [None]:
d = dict(enumerate("asdfghjkl"))

In [None]:
from collections.abc import Mapping
from dataclasses import dataclass

from torch.utils.data import Dataset

In [None]:
@dataclass
class MyMapping(Dataset, Mapping):
    internal_dict: dict

    def __iter__(self):
        return iter(self.internal_dict)

    def __getitem__(self, key):
        return self.internal_dict[key]

    def __len__(self):
        return len(self.internal_dict)

In [None]:
from torch.utils.data import DataLoader

dataloader = DataLoader(MyMapping(d))

In [None]:
from typing import Any, Callable, Sequence


def flatten_dict(
    d: dict[str, Any],
    /,
    *,
    recursive: bool = True,
    join_fn: Callable[[Sequence[str]], str] = ".".join,
) -> dict[str, Any]:
    r"""Flatten dictionaries recursively."""
    result = {}
    for key, item in d.items():
        if isinstance(item, dict) and recursive:
            subdict = flatten_dict(item, recursive=True, join_fn=join_fn)
            for subkey, subitem in subdict.items():
                result[join_fn((key, subkey))] = subitem
        else:
            result[key] = item
    return result


def unflatten_dict(
    d: dict[str, Any],
    /,
    *,
    recursive: bool = True,
    split_fn: Callable[[str], Sequence[str]] = lambda s: s.split(".", maxsplit=1),
) -> dict[str, Any]:
    r"""Unflatten dictionaries recursively."""
    result = {}
    for key, item in d.items():
        split = split_fn(key)
        result.setdefault(split[0], {})
        if len(split) > 1 and recursive:
            assert len(split) == 2
            subdict = unflatten_dict(
                {split[1]: item}, recursive=recursive, split_fn=split_fn
            )
            result[split[0]] |= subdict
        else:
            result[split[0]] = item
    return result


a = {
    "a": True,
    "b": 42,
    "c": "foo",
    "foo": {"a": 1, "b": 2, "c": 3},
    "bar": {"a": {"a": 1}, "b": {"a": 1}, "c": {"a": 1}},
    "baz": {"a": True, "b": 2},  # <-- does not work!
}

print(flat := flatten_dict(a))
print(unflat := unflatten_dict(a))
assert unflat == a

In [None]:
flat

In [None]:
unflatten_dict(flat)

In [None]:
"a".split(".")

In [None]:
dir(".")