In [1]:
from neuromancer import dataset
import psl
import torch
import numpy as np

## Sequential dataset loading

In [2]:
# loading arbitrary data from a CSV file
data_path = psl.datasets["aero"]
data = dataset.read_file(data_path)

In [3]:
train, val, test = dataset.split_sequence_data(data, nsteps=16)

In [4]:
train, stats = dataset.normalize_data(train, "zscore")
val, _ = dataset.normalize_data(val, "zscore", stats=stats)
test, _ = dataset.normalize_data(test, "zscore", stats=stats)

In [5]:
train_dset = dataset.SequenceDataset(train, nsteps=16, name="train")
val_dset = dataset.SequenceDataset(val, nsteps=16, name="val")
test_dset = dataset.SequenceDataset(test, nsteps=16, name="test")

In [6]:
train_fullbatch = train_dset.get_full_batch()
train_fullseq = train_dset.get_full_sequence()

In [7]:
{k: v.shape for k, v in train_fullbatch.items() if isinstance(v, torch.Tensor)}

{'Up': torch.Size([16, 10, 10]),
 'Yp': torch.Size([16, 10, 5]),
 'Uf': torch.Size([16, 10, 10]),
 'Yf': torch.Size([16, 10, 5])}

In [8]:
{k: v.shape for k, v in train_fullseq.items() if isinstance(v, torch.Tensor)}

{'Up': torch.Size([160, 1, 10]),
 'Yp': torch.Size([160, 1, 5]),
 'Uf': torch.Size([160, 1, 10]),
 'Yf': torch.Size([160, 1, 5])}

## Multi-sequence dataset loading

In [9]:
simulator = psl.emulators["TwoTank"](nsim=1024)
data = [
    simulator.simulate(x0=np.random.rand(2))
    for _ in range(15)
]

In [10]:
train, val, test = dataset.split_sequence_data(data, nsteps=16)

In [11]:
train, stats = dataset.normalize_data(train, "zscore")
val, _ = dataset.normalize_data(val, "zscore", stats=stats)
test, _ = dataset.normalize_data(test, "zscore", stats=stats)

In [12]:
train_dset = dataset.SequenceDataset(train, nsteps=16)
val_dset = dataset.SequenceDataset(val, nsteps=16)
test_dset = dataset.SequenceDataset(test, nsteps=16)

In [13]:
train_fullbatch = train_dset.get_full_batch()
train_fullseq = train_dset.get_full_sequence()

In [14]:
{k: v.shape for k, v in train_fullbatch.items() if isinstance(v, torch.Tensor)}

{'Up': torch.Size([16, 319, 2]),
 'Xp': torch.Size([16, 319, 2]),
 'Yp': torch.Size([16, 319, 2]),
 'Uf': torch.Size([16, 319, 2]),
 'Xf': torch.Size([16, 319, 2]),
 'Yf': torch.Size([16, 319, 2])}

In [15]:
{k: v.shape for k, v in train_fullseq[0].items() if isinstance(v, torch.Tensor)}

{'Up': torch.Size([1008, 1, 2]),
 'Xp': torch.Size([1008, 1, 2]),
 'Yp': torch.Size([1008, 1, 2]),
 'Uf': torch.Size([1008, 1, 2]),
 'Xf': torch.Size([1008, 1, 2]),
 'Yf': torch.Size([1008, 1, 2])}

## Static dataset loading

In [16]:
train, val, test = [
    {"X": np.random.rand(400, 20), "Y": np.random.rand(400, 2)}
    for _ in range(3)
]

In [17]:
train, stats = dataset.normalize_data(train, "zscore")
val, _ = dataset.normalize_data(val, "zscore", stats=stats)
test, _ = dataset.normalize_data(test, "zscore", stats=stats)

In [18]:
train_dset = dataset.StaticDataset(train, name="train")
val_dset = dataset.StaticDataset(val, name="val")
test_dset = dataset.StaticDataset(test, name="test")

In [19]:
train_fullbatch = train_dset.get_full_batch()

In [20]:
{k: v.shape for k, v in train_fullbatch.items() if isinstance(v, torch.Tensor)}

{'X': torch.Size([400, 20]), 'Y': torch.Size([400, 2])}