# Exploring TS definitions...
> Evaluating the performance of iterating over a bucnh of timeseries.

We will explore different possibilities to construct a data pipeline that is fast and simple using fastai building blocks.

> Disclaimer: Ignacio's tsai solves this issue doing something very similar to the solution proposed at the end of this notebook.

In [None]:
#export
import pandas as pd
from fastcore.all import *
from scipy.io import arff

import fastai
from fastcore.test import *
from fastai.basics import *

In [None]:
print(f'fastai: {fastai.__version__} \ntorch:  {torch.__version__}')

fastai: 2.1.6 
torch:  1.7.0


## TESTS

In [None]:
X = np.random.rand(10000, 1000)
y = np.random.randint(0,10,10000)

## Loading from arrays:

The simples possible idea, would be to build a Numpy (kinda of scipy) type of Dataset. We only need to define how to `__getitem__` and `__len__`.

In [None]:
class NaiveNumpyDataset(torch.utils.data.Dataset):
    "A Dataset that stores numpy arrays"
    def __init__(self, X, y=None):
        self.X, self.y = X, y

    def __getitem__(self, idx):
        if self.y is None: return (self.X[idx], )
        else: return (self.X[idx], self.y[idx])

    def __len__(self):
        return len(self.X)

In [None]:
ds = NaiveNumpyDataset(X,y)

we can convert this to a dataloader directly

In [None]:
dls_torch = torch.utils.data.DataLoader(dataset=ds, batch_size=8)

The fastai's DataLoaders (it is just thin wrapper around pytorch's one, or not?)

In [None]:
dls = DataLoaders.from_dsets(ds, bs=8)

In [None]:
def cycle_dl(dl):
    for _ in iter(dl):
        pass

In [None]:
%timeit cycle_dl(dls_torch)

87.1 ms ± 841 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%timeit cycle_dl(dls.train)

827 ms ± 15.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


it is 10 times slower!

in real life, we would like to iterate on torch Tensors and send the output to GPU... `TSeries.create` does this.

In [None]:
class NumpyDataset():
    "Tensor aware implementation"
    def __init__(self, X, y=None):
        self.X, self.y = X, y

    def __getitem__(self, idx):
        if self.y is None: return (self.X[idx], )
        else: return (TSeries.create(self.X[idx]), TensorCategory(self.y[idx]))

    def __len__(self):
        return len(self.X)

In [None]:
ds = NumpyDataset(X,y)

In [None]:
ds[0]

(TSeries(ch=1, len=1000), TensorCategory(5))

In [None]:
dls = DataLoaders.from_dsets(ds, bs=8)

In [None]:
dls.train.one_batch()

(TSeries(ch=8, len=1000), TensorCategory([5, 1, 6, 5, 4, 4, 4, 7]))

In [None]:
%timeit cycle_dl(dls.train)

1.1 s ± 24.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


the casting is painfully slow...

In [None]:
class TSTransform(Transform):
    def __init__(self, x, y):
        self.x, self.y = x, y
        
    def encodes(self, i):
        return (TSeries.create(self.x[i]), TensorCategory(self.y[i]))

In [None]:
tl = TfmdLists(range_of(X), TSTransform(X, y))

In [None]:
tl[0:5]

(#5) [(TSeries(ch=1, len=1000), TensorCategory(5)),(TSeries(ch=1, len=1000), TensorCategory(1)),(TSeries(ch=1, len=1000), TensorCategory(6)),(TSeries(ch=1, len=1000), TensorCategory(4)),(TSeries(ch=1, len=1000), TensorCategory(5))]

In [None]:
dls = DataLoaders.from_dsets(tl, bs=8)

In [None]:
bx, by = dls.one_batch()

In [None]:
%timeit cycle_dl(dls.train)

1.13 s ± 21.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
dl =TfmdDL(tl, bs=8)

In [None]:
%timeit cycle_dl(dl)

1.12 s ± 12.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Using TfmLists is not better...

## The `contigous memory solution`
> Fixing the issue of slicing

In [None]:
class Slicer:
    "slice numpy ds"
    def __init__(self,to): self.to = to
    def __getitem__(self, idxs):
        return self.to.new(*self.to[idxs])

class NumpyDataset2():
    def __init__(self, X, y=None):
        self.X, self.y = X, y

    def __getitem__(self, idx):
        if self.y is None: return (self.X[idx], )
        else: return (self.X[idx], self.y[idx])

    def __len__(self):
        return len(self.X)

    @property
    def slicer(self):
        return Slicer(self)

    def new(self, X, y):
        return type(self)(X, y)

In [None]:
ds = NumpyDataset2(X,y)

In [None]:
ds.slicer[0:4]

<__main__.NumpyDataset2 at 0x7f81d9d2e160>

In [None]:
class ReadTSBatch(ItemTransform):
    def __init__(self, to): self.to = to

    def encodes(self, to):
        res = (tensor(to.X).float(), )
        res = res + (tensor(to.y),)
        # if to.device is not None: res = to_device(res, to.device)
        return res

In [None]:
rtb = ReadTSBatch(ds) 
rtb.encodes(ds.slicer[0:4])

(tensor([[0.2080, 0.8743, 0.1296,  ..., 0.8668, 0.8287, 0.5834],
         [0.4801, 0.7196, 0.1208,  ..., 0.8895, 0.9759, 0.4256],
         [0.7388, 0.8696, 0.8988,  ..., 0.8905, 0.6833, 0.7819],
         [0.6905, 0.0943, 0.7811,  ..., 0.4363, 0.2213, 0.9070]]),
 tensor([5, 1, 6, 4]))

In [None]:
class TSDataloader(TfmdDL):
    do_item = noops
    def __init__(self, dataset, bs=16, shuffle=False, after_batch=None, num_workers=0, **kwargs):
        if after_batch is None: after_batch = L(TransformBlock().batch_tfms)+ReadTSBatch(dataset)
        super().__init__(dataset, bs=bs, shuffle=shuffle, after_batch=after_batch, num_workers=num_workers, **kwargs)

    def create_batch(self, b): return self.dataset.slicer[b]

In [None]:
dl = TSDataloader(ds, bs=128)

In [None]:
%timeit cycle_dl(dl)

20.6 ms ± 587 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
