In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

import logging

logging.basicConfig(level=logging.INFO)

In [None]:
from typing import Literal

import numpy as np
import pandas
import torch
from pandas import DataFrame
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

from tsdm.encoders import time2float

rng = np.random.default_rng()
np.set_printoptions(4)

In [None]:
from tsdm.datasets import ETTh1

ds = ETTh1.dataset
target = "OT"
forecasting_horizon: Literal[24, 48, 168, 336, 960] = (24,)
observation_horizon: Literal[24, 48, 96, 168, 336, 720] = (96,)
test_metric: Literal["MSE", "MAE"] = ("MSE",)

train_dataset = ds[:"2017-06-30"]  # inclusive range!
valid_dataset = ds["2017-07-01":"2017-10-31"]  # inclusive range!
trial_dataset = ds["2017-11-01":"2018-02-28"]  # inclusive range!
trial_dataset_copy = trial_dataset.copy()

## Preprocessing

In [None]:
from sklearn.preprocessing import StandardScaler

encoder = StandardScaler()
encoder.fit(train_dataset)
display(
    DataFrame.from_dict(
        {"mean": encoder.mean_, "stdv": encoder.scale_},
        orient="index",
        columns=train_dataset.columns,
    )
)

In [None]:
encoder.transform(trial_dataset, copy=False)
trial_dataset

In [None]:
encoder.inverse_transform(trial_dataset, copy=False)
pandas.testing.assert_frame_equal(trial_dataset, trial_dataset_copy)

In [None]:
splits["train"]

### Data Loading

In [None]:
from tsdm.datasets import SequenceDataset
from tsdm.utils.samplers import SequenceSampler

In [None]:
time_encoder = time2float
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.float32

train_dataset.drop(columns="OT")

_T = time_encoder(train_dataset.index)
_X = train_dataset.drop(columns="OT").values
_Y = train_dataset["OT"].values

T = torch.tensor(_T, device=device, dtype=dtype)
X = torch.tensor(_X, device=device, dtype=dtype)
Y = torch.tensor(_Y, device=device, dtype=dtype)

In [None]:
class SequenceDataset(Dataset):
    def __init__(self, tensors: list[Tensor]):
        assert all(len(x) == len(tensors[0]) for x in tensors)
        self.tensors = tensors

    def __len__(self):
        return len(self.tensors[0])

    def __getitem__(self, idx):
        return [x[idx] for x in self.tensors]


class SequenceSampler(torch.utils.data.Sampler):
    def __init__(self, data, seq_len):
        self.data = data
        self.seq_len = seq_len

    def __iter__(self):
        print(len(self.data), self.seq_len)
        for idx in range(len(self.data) - self.seq_len):
            yield range(idx, idx + self.seq_len)

In [None]:
train_dataset = SequenceDataset([T, X, Y])
sampler = SequenceSampler(train_dataset, 2)
samples = list(iter(DataLoader(train_dataset, shuffle=True)));

In [None]:
first, second, last = (samples[0], samples[1], samples[-1])
first, second, last

# Implemented Task

In [None]:
from tsdm.tasks import ETDatasetInformer

In [None]:
task = ETDatasetInformer("ETTh2")

In [None]:
dloader = task.get_dataloader("test")

In [None]:
task.splits["test"].values.mean(axis=0)

In [None]:
for item in dloader:
    t, x, y = item
torch.mean(x, dim=(0, 1)), torch.std(x, dim=(0, 1))

In [None]:
task.dataset.dataset