# How to work with numpy arrays in fastai2: time series classification

I'd like to share how you can work with (very large) numpy arrays in fastai2 through a time series classification example. In this case we'll use a multivariate time series dataset.

**High level requirements:**

- Be able to work with *numpy arrays with any number of dimensions*. 
- Data may be *larger than RAM*, so it may be in memory or on disk.
- Use data on disk with similar *performance* to data in memory.
- Data is often *split*: 
    - X, y
    - X_train, X_valid, y_train, y_valid
- Add an *unlabeled dataset* (for example for semi-supervised/ self-supervised learning).
- Add test dataset.

In [1]:
#hide
from nbdev.showdoc import *
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
from fastai2.torch_core import *
from fastai2.data.all import *
from fastai2.learner import *
from fastai2.metrics import *

In [3]:
from timeseries.imports import *
from timeseries.utils import *
from timeseries.data import *
# from timeseries.core import *
from timeseries.models import *

## Download data

In [None]:
# export
class TSTensor(TensorBase):

    @classmethod
    def create(cls, o): 
        return cls(To2DPlusTensor(o)) # creates a TSTensor with at least 2 dimensions of type float

    @property
    def vars(self): return self.shape[-2]

    @property
    def len(self): return self.shape[-1]
    
    def __getitem__(self, idx):
        res = super().__getitem__(idx)
        return retain_type(res, self)

    def __repr__(self):
        return f'TSTensor(size:{list(self.size())})'

    def show(self, ax=None, ctx=None, title=None, **kwargs):
        ax = ifnone(ax,ctx)
        if ax is None: fig, ax = plt.subplots(**kwargs)
        ax.plot(self.T)
        ax.axis(xmin=0, xmax=self.shape[-1] - 1)
        ax.set_title(title, weight='bold')
        plt.tight_layout()
        return ax

@Transform
def ToTSTensor(o:np.ndarray): 
    """ np.ndarray to tensor of dtype torch.float32"""
    return TSTensor.create(o)

In [4]:
a = np.random.rand(16, 6, 12)
ts = TSTensor.create(a)
test_eq(type(ts).__name__, 'TSTensor')
test_eq(type(ts[0]).__name__, 'TSTensor')
test_eq(type(ts[0][0]).__name__, 'TSTensor')
test_eq(isinstance(ts, torch.Tensor), True)
ts

NameError: name 'TSTensor' is not defined

In [None]:
a = torch.rand(2, 3).double()
t = ToTSTensor(a)
test_eq(list(t.shape), [2, 3])
test_eq(t.dtype, torch.float64)
test_eq(ToType(torch.float32)(t).dtype, torch.float32)

In [None]:
a = np.random.rand(2, 3).astype('float64')
t = ToTSTensor(a)
test_eq(list(t.shape), [2, 3])
test_eq(t.dtype, torch.float32)

In [None]:
a = np.random.rand(2, 3).astype('float32')

In [None]:
# %timeit TSTensor.create(a)

In [None]:
b = np.random.rand(16, 1, 12)
ts = TSTensor.create(b)
test_eq(ts.ndim, 3)
test_eq(ts[0].ndim, 2)
test_eq(ts[0][0].ndim, 1)
ts, ts[0], ts[0][0], ts[0][0][0]

In [None]:
# %timeit TSTensor.create(b)

In [None]:
b = np.random.rand(16, 12)
c = np.random.randint(0, 3, 100)
labels = L(['a', 'b', 'c'])[c]
items = itemify(b, labels)
t = ToTSTensor(items[0])
test_eq(list(t[0].data.shape), [1, 12])
tl = TfmdLists(items, ToTSTensor)
test_eq(list(tl[0][0].data.shape), [1, 12])

In [None]:
# %timeit tl[0]

In [None]:
#export
class TSTfmdDL(TfmdDL): 

    @property
    def vars(self): return self.dataset[0][0].shape[-2]
    
    @property
    def len(self): return self.dataset[0][0].shape[-1]

    @delegates(plt.subplots)
    def show_batch(self, b=None, max_n=9, nrows=3, ncols=3, figsize=(12, 10), **kwargs):
        if b is None: b = self.one_batch()
        db = self.decode_batch(b, max_n=max_n)
        if nrows is None: 
            sqrt = math.sqrt(len(db))
            rows = min(math.ceil(sqrt), len(db)) 
        if ncols is None: ncols = len(db) // rnows
        fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize,  **kwargs)
        for tup, ax in zip(db[:nrows ** 2], [axs] if nrows == 1 else axs.flatten()): 
            show_tuple(tup, ax=ax)
        plt.tight_layout()

@delegates(plt.subplots)
def show_tuple(tup, ax=None, **kwargs):
    "Display a timeseries plot from a tuple"
    tup[0].show(title='unlabeled' if len(tup) == 1 else tup[1], ax=ax, **kwargs)
    
    
def cycle_dl(dl):
    for x,y in iter(dl): pass

In [None]:
dsid = 'StarLightCurves'

In [None]:
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, path='..', verbose=False)
X = np.concatenate((X_train, X_valid))
y = np.concatenate((y_train, y_valid))
folder = 'data/UCR'
np.save(str(PATH.parent/f'{folder}/{dsid}/X.npy'), X) # cannot use pathlib.PosixPath as filename 
np.save(str(PATH.parent/f'{folder}/{dsid}/y.npy'), y)
del X, y
X = np.load(str(PATH.parent/f'{folder}/{dsid}/X.npy'), mmap_mode='r')
y = np.load(str(PATH.parent/f'{folder}/{dsid}/y.npy'), mmap_mode='r')
split_idx = (L(list(np.arange(len(X_train)))), L(list(np.arange(len(X_train), len(X)))))
X.shape, y.shape, X.__class__.__name__, X.dtype

In [None]:
def pre_process_TL(tfmdlists, process=True, **kwargs): 
    if process: return TfmdLists(tfmdlists[:], None, **kwargs)
    else: return tfmdlists

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
# tfms = None
tls = None
kwargs = {'splits':split_idx}
pre_process = True
it = 0

items = itemify(np.array(X), np.array(y))
n_items = len(items[0])
assert (tfms is None or n_items == len(tfms)), f"n_tfms ({len(tfms)}) doesn't match n_items ({n_items})"
process = [False] * (n_items - 1) + [not pre_process] if X.__class__.__name__ == 'memmap' else [not pre_process] * (n_items)
tfms = [None] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(n_items)]
tls = L(tls if tls else [pre_process_TL(TfmdLists(items, t, **kwargs), p, **kwargs) for p, t in zip(process, tfms)])

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
tfms = [[ToTSTensor]]
# tfms = None
items = itemify(X,)
# items = itemify(X,y)
n_items = len(items[0])
assert (tfms is None or n_items == len(tfms)), f"n_tfms ({len(tfms)}) doesn't match n_items ({n_items})"
process = [False] * (n_items - 1) + [not pre_process] if items[0][0].__class__.__name__ == 'memmap' else [not pre_process] * (n_items)
process

In [None]:
def _test(it):
    if len(tls) == 1: return tls[0][it]
    res = tuple([tl[it] for tl in tls])
    return res if is_indexer(it) else list(zip(*res))

def _test_train(it):
    if len(tls) == 1: return tls[0].train[it]
    res = tuple([tl.train[it] for tl in tls])
    return res if is_indexer(it) else list(zip(*res))

def _test_valid(it):
    if len(tls) == 1: return tls[0].valid[it]
    res = tuple([tl.valid[it] for tl in tls])
    return res if is_indexer(it) else list(zip(*res))

In [None]:
_test(0), _test([0, 1])

In [None]:
_test_train(0), _test_train([0, 1])

In [None]:
_test_valid(0), _test_valid([0, 1])

In [None]:
%timeit _test(0)

In [None]:
class NumpyDatasets(FilteredBase):
    "A dataset that creates a tuple from each `tfms`, passed thru `item_tfms`"
    def __init__(self, X=None, y=None, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, pre_process=True, **kwargs):
        super().__init__(dl_type=dl_type)
        
        
        if tls is None:
            if items is None: items = itemify(X,) if y is None else itemify(X,y)
            n_items = len(items[0])
            assert (tfms is None or n_items == len(tfms)), f"n_tfms ({len(tfms)}) doesn't match n_items ({n_items})"
            process = [False] * (n_items - 1) + [pre_process] if items[0][0].__class__.__name__ == 'memmap' else [pre_process] * (n_items)
            tfms = [None] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(n_items)]
        self.tls = L(tls if tls else [pre_process_TL(TfmdLists(items, t, **kwargs), p, **kwargs) for p, t in zip(process, tfms)])
#         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp

    def __getitem__(self, it):
        if len(self.tls) == 1: return self.tls[0][it]
        res = tuple([tl[it] for tl in self.tls])
        return res if is_indexer(it) else list(zip(*res))

    def __getattr__(self,k): return gather_attrs(self, k, 'tls')
    def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
    def __len__(self): return len(self.tls[0])
    def __iter__(self): return (self[i] for i in range(len(self)))
    def __repr__(self): return coll_repr(self)
    def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp)
    def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    def overlapping_splits(self): return self.tls[0].overlapping_splits()
    @property
    def splits(self): return self.tls[0].splits
    @property
    def split_idx(self): return self.tls[0].tfms.split_idx
    @property
    def items(self): return self.tls[0].items
    @items.setter
    def items(self, v):
        for tl in self.tls: tl.items = v

    def show(self, o, ctx=None, **kwargs):
        for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
        return ctx

    def new_empty(self):
        tls = [tl._new([], split_idx=tl.split_idx) for tl in self.tls]
        return type(self)(tls=tls, n_inp=self.n_inp)

    @contextmanager
    def set_split_idx(self, i):
        old_split_idx = self.split_idx
        for tl in self.tls: tl.tfms.split_idx = i
        yield self
        for tl in self.tls: tl.tfms.split_idx = old_split_idx

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
# tfms = None
dsets = NumpyDatasets(X, y, tfms=tfms, splits=split_idx, pre_process=True)

In [None]:
dsets[0]

In [None]:
dsets.train[0]

In [None]:
%timeit dsets[0]

In [None]:
train_ds = dsets.train

In [None]:
%timeit train_ds[0]

In [None]:
valid_dl = TSTfmdDL(dsets.valid, bs=128)
xb,yb = next(iter(valid_dl))
xb.shape

In [None]:
%time cycle_dl(valid_dl)

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
# tfms = None
dsets = NumpyDatasets(np.array(X), y, tfms=tfms, splits=split_idx, pre_process=False)
dls = dsets.dataloaders(bs=64, val_bs=128, num_workers=None)
valid_dl = TSTfmdDL(dsets.valid, bs=128, num_workers=None)

In [None]:
train_dl, valid_dl = dls
xb,yb = next(iter(valid_dl))
xb.shape

In [None]:
dsets[0]

In [None]:
dsets.train[0]

In [None]:
%timeit dsets[0]

In [None]:
train_ds = dsets.train

In [None]:
%timeit train_ds[0]

In [None]:
train_dl, valid_dl = dsets.dataloaders(bs=64, val_bs=128, num_workers=None)
xb,yb = next(iter(valid_dl))
xb.shape

In [None]:
%time cycle_dl(valid_dl)

In [None]:
type(dls.dataloaders())

In [None]:
%time cycle_dl(valid_dl)

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
# tfms = None
dsets = NumpyDatasets(np.array(X), y, tfms=tfms, splits=split_idx, pre_process=True)
dls = TSTfmdDL(dsets, bs=64, val_bs=128, num_workers=None)
valid_dl = TSTfmdDL(dsets.valid, bs=128, num_workers=None)

In [None]:
xb,yb = next(iter(valid_dl))
xb.shape

In [None]:
%time cycle_dl(valid_dl)

In [None]:
xb,yb = next(iter(dls.valid))
xb.shape

In [None]:
%time cycle_dl(dls.valid)

In [None]:
valid_dl.show_batch()

In [None]:
class Datasets(FilteredBase):
    "A dataset that creates a tuple from each `tfms`, passed thru `item_tfms`"
    def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
        super().__init__(dl_type=dl_type)
        self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp

    def __getitem__(self, it):
        res = tuple([tl[it] for tl in self.tls])
        return res if is_indexer(it) else list(zip(*res))

    def __getattr__(self,k): return gather_attrs(self, k, 'tls')
    def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
    def __len__(self): return len(self.tls[0])
    def __iter__(self): return (self[i] for i in range(len(self)))
    def __repr__(self): return coll_repr(self)
    def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp)
    def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    def overlapping_splits(self): return self.tls[0].overlapping_splits()
    @property
    def splits(self): return self.tls[0].splits
    @property
    def split_idx(self): return self.tls[0].tfms.split_idx
    @property
    def items(self): return self.tls[0].items
    @items.setter
    def items(self, v):
        for tl in self.tls: tl.items = v

    def show(self, o, ctx=None, **kwargs):
        for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
        return ctx

    def new_empty(self):
        tls = [tl._new([], split_idx=tl.split_idx) for tl in self.tls]
        return type(self)(tls=tls, n_inp=self.n_inp)

    @contextmanager
    def set_split_idx(self, i):
        old_split_idx = self.split_idx
        for tl in self.tls: tl.tfms.split_idx = i
        yield self
        for tl in self.tls: tl.tfms.split_idx = old_split_idx

#

In [None]:
tl = processTL(TfmdLists(items, ItemGetter(0)), splits=split_idx)

In [None]:
tl[0,1]

In [None]:
# ORIGINAL lazy=True
items = itemify(X,y)
tfms = [[ToTSTensor], [Categorize()]]
kwargs = {}
tls = None

assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range_of(items[0])]
tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])

def _test(it):
    res = tuple([tl[it] for tl in tls])
    return res if is_indexer(it) else list(zip(*res))

_test(0)

In [None]:
# # ORIGINAL - UNLABELED lazy=True
# items = itemify(X)
# tfms = [ToTSTensor]
# kwargs = {}
# tls = None

# assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
# tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range_of(items[0])]
# tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])

# def _test(it):
#     res = tuple([tl[it] for tl in tls])
#     return res if is_indexer(it) else list(zip(*res))

# _test([0,1])

In [None]:
%timeit _test(0)

In [None]:
%timeit _test([0, 1])

In [None]:
# NON-LAZY lazy=False
tfms = [[ToTSTensor], [Categorize()]]
kwargs = {}
tls = None

items = itemify(X,y)
assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range_of(items[0])]
tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))]).zip()
tls = L(zip(*tls))

def _test2(it):
    return tls[it] if is_indexer(it) else list(tls[it])
def _test2(it):
    res = tuple([L(tl)[it] for tl in tls])
    return res if is_indexer(it) else list(zip(*res))

_test2([0,1])

In [None]:
tls[0][]

In [None]:
%timeit _test2(0)

In [None]:
%timeit _test2([0,1])

In [None]:
# MIXED lazy=False in_memory=True (memmap)
tfms = [[ToTSTensor], [Categorize()]]
kwargs = {}
tls = None
lazy = True

if not tls:
    items = itemify(X,y)
    items_len = len(items[0])
    xtfms = None
    assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
    if items[0][0].__class__.__name__ == 'memmap': # if data in memory split tfms
        lazy = False
        if tfms is not None: xtfms, tfms[0] = tfms[0], None
    tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(items_len)]
    tls = L([TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    if not lazy: tls = tls.zip()

def _test3(it):
    res = tuple(TfmdLists(tls[it], xtfms, **kwargs)[:items_len])
    return res if is_indexer(it) else list(res)

_test3([0,1])

In [None]:
%timeit _test3(0)

In [None]:
%timeit _test3([0,1])

In [None]:
# ALL
tfms = [[ToTSTensor], [Categorize()]]
kwargs = {}
tls = None
lazy = False

if not tls:
#     items = itemify(X,y)
    items = itemify(np.array(X), np.array(y))
    items_len = len(items[0])                
    assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
    if items[0][0].__class__.__name__ == 'memmap': lazy = False # if data on disk split tfms
    if not lazy and tfms is not None: xtfms, tfms[0] = tfms[0], None
    tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(items_len)]
    tls = L([TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
#     if not lazy: # By applying this we perform all tfms- is there an alternative?
#         tls = L(tls.zip())
#                 tls = L(zip(*tls))

# def _test4(it):
#     if xtfms is not None: 
#         print(1)
#         res = tuple(TfmdLists(tls[it], xtfms, **kwargs)[:items_len])
#         return res if is_indexer(it) else list(res)
#     elif lazy:
#         print(2)
#         res = tuple([tl[it] for tl in tls])
#         return res if is_indexer(it) else list(zip(*res))
#     else:
#         print(3)
#         return tls[it] if is_indexer(it) else list(tls[it])

# _test4([0,1])

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
tls = L([TfmdLists(X, None), TfmdLists(y, tfms[1])]).zip()
tls2 = L([TfmdLists(tls, tfms[0], splits=split_idx)])
tls2

In [None]:
items = itemify(np.array(X), np.array(y))
tfms = [[ToTSTensor], [Categorize()]]
tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(items_len)]
tls0 = TfmdLists(TfmdLists(items, tfms[0]), None, splits=split_idx)
tls1 = TfmdLists(TfmdLists(items, tfms[1])[:len(items)], None, splits=split_idx)
tls  = L([tls0, tls1])

In [None]:
%timeit tls[0]

In [None]:
tls2[0].subset(0)[0,1]

In [None]:
tls2 = L(tls.zip())
tls3 = L(zip(*tls2))

In [None]:
TfmdLists(tls.zip(), None)[0]

In [None]:
if tls is None:
            if items is None:
                if y is None: items = itemify(X)
                else: items = itemify(X,y)
            items_len = len(items[0])                
            assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
            if items[0][0].__class__.__name__ == 'memmap': lazy = False # if data on disk split tfms
            if not lazy and tfms is not None: xtfms, tfms[0] = tfms[0], None
            tfms = [ItemGetter(i) for i in range_of(items[0])] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(items_len)]
            tls = L([TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
            if not lazy: # By applying this we perform all tfms- is there an alternative?
                tls = L(tls.zip())
#                 tls = L(zip(*tls))
        else: 
            tls = L(tls)
            items_len = len(tls[0])
        self.tls = tls
        self.xtfms = xtfms
        self.items_len = items_len
        self.lazy=lazy
        print(lazy, xtfms)

In [None]:
itemify(X,y)

In [None]:
split_idx

In [None]:
tfms = [[ToTSTensor], [Categorize()]]
# items = itemify(np.array(X), np.array(y))
dsets = TSDatasets(X, y, tfms=tfms, splits=split_idx, lazy=False)

In [None]:
dsets.splits

In [None]:
type(dsets.tls[0]), len(dsets.tls[0].subset(0))

In [None]:
valid_dl = TSTfmdDL(dsets.train, bs=128)
xb,yb = next(iter(valid_dl))
xb.shape

In [None]:
%time cycle_dl(valid_dl)

In [None]:
dls = TfmdDL(dsets, bs=64, val_bs=128)

In [None]:
xb,yb = next(iter(dls.train))
xb

In [None]:
t = L(TfmdLists(L(dsets.tls).zip(), None))
t[0]

In [None]:
t = L(TfmdLists(L(dsets.tls).zip(), ToTSTensor))
t[0]

In [None]:
%timeit t[0]

In [None]:
dsets[0, 1]

In [None]:
%timeit dsets[0]

In [None]:
train_ds = dsets.train
train_ds[0, 1]

In [None]:
%timeit train_ds[0]

In [None]:
%timeit train_ds[0]

In [None]:
train_ds = dsets.train

In [None]:
%timeit train_ds[0]

In [None]:
%timeit dsets[0]

In [None]:
%timeit dsets.train[0]

In [None]:
t = L([TfmdLists(tl, None) for tl in dsets.tls])

In [None]:
t[0].subset(0)

In [None]:
dsets.train[0]

In [None]:
tuple((Pipeline(ToTSTensor)(X)[0], TfmdLists(y, Categorize())[:len(y)][0]))

In [None]:
dsets.train[0]

In [None]:
tls = dsets.tls.zip()
tls

In [None]:
def cycle_dl(dl):
    for x,y in iter(dl): pass

In [None]:
FilteredBase??

In [None]:
class TfmdLists(FilteredBase, L, GetAttr):
    "A `Pipeline` of `tfms` applied to a collection of `items`"
    _default='tfms'
    def __init__(self, items, tfms, use_list=None, do_setup=True, split_idx=None, train_setup=True,
                 splits=None, types=None, verbose=False):
        super().__init__(items, use_list=use_list)
        self.splits = L([slice(None),[]] if splits is None else splits).map(mask2idxs)
        if isinstance(tfms,TfmdLists): tfms = tfms.tfms
        if isinstance(tfms,Pipeline): do_setup=False
        self.tfms = Pipeline(tfms, split_idx=split_idx)
        self.types = types
        if do_setup:
            pv(f"Setting up {self.tfms}", verbose)
            self.setup(train_setup=train_setup)

    def _new(self, items, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, types=self.types, **kwargs)
    def subset(self, i): return self._new(self._get(self.splits[i]), split_idx=i)
    def _after_item(self, o): return self.tfms(o)
    def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
    def __iter__(self): return (self[i] for i in range(len(self)))
    def show(self, o, **kwargs): return self.tfms.show(o, **kwargs)
    def decode(self, o, **kwargs): return self.tfms.decode(o, **kwargs)
    def __call__(self, o, **kwargs): return self.tfms.__call__(o, **kwargs)
    def overlapping_splits(self): return L(Counter(self.splits.concat()).values()).filter(gt(1))

    def setup(self, train_setup=True):
        self.tfms.setup(self, train_setup)
        if len(self) != 0:
            x = super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]
            self.types = []
            for f in self.tfms.fs:
                self.types.append(getattr(f, 'input_types', type(x)))
                x = f(x)
            self.types.append(type(x))
        types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()
        self.pretty_types = '\n'.join([f'  - {t}' for t in types])

    def infer_idx(self, x):
        idx = 0
        for t in self.types:
            if isinstance(x, t): break
            idx += 1
        types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()
        pretty_types = '\n'.join([f'  - {t}' for t in types])
        assert idx < len(self.types), f"Expected an input of type in \n{pretty_types}\n but got {type(x)}"
        return idx

    def infer(self, x):
        return compose_tfms(x, tfms=self.tfms.fs[self.infer_idx(x):], split_idx=self.split_idx)

    def __getitem__(self, idx):
        res = super().__getitem__(idx)
        if self._after_item is None: return res
        return self._after_item(res) if is_indexer(idx) else res.map(self._after_item)

In [None]:
class TSDatasets(FilteredBase):
    "A dataset that creates a tuple from each `tfms`, passed thru `item_tfms`"
    def __init__(self, X=None, y=None, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, xtfms=None, lazy=True, **kwargs):
        super().__init__(dl_type=dl_type)
        if tls is None:
            if items is None: items = itemify(X) if y is None else itemify(X,y)
            n_items = len(items[0])                
            assert tfms is None or len(tfms) == len(items[0]), 'len tfms == len items[0]'
#             if items[0][0].__class__.__name__ == 'memmap': 
#                 lazy = False # if data on disk split tfms
#                 if tfms is not None: xtfms, tfms[0] = tfms[0], None
            tfms = [ItemGetter(i) for i in range(n_items)] if tfms is None else [[ItemGetter(i)] + L(tfms[i]) for i in range(n_items)]
            tls = L([TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
            if not lazy: # By applying this we perform all tfms - is there an alternative?
#                 tls  = L([TfmdLists(X, None), TfmdLists(y, tfms[1])]).zip()
#                 tls2 = L([TfmdLists(tls, tfms[0], splits=split_idx)])
                tls = L(tls.zip())
#                 tls = L(zip(*tls))
#                 tls = L([TfmdLists(tls.zip(), xtfms)])
                tls = L([TfmdLists(L([tls.zip()]), xtfms, **kwargs)])
        else: 
            tls = L(tls)
            n_items = len(tls)
        self.tls = tls
        self.xtfms = xtfms
        self.n_items = n_items
        self.lazy=lazy
        
#         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp

    def __getitem__(self, it):
        if self.lazy:
#             print(1)
            res = tuple([tl[it] for tl in self.tls])
            return res if is_indexer(it) else list(zip(*res))
        else:
#             print(3)
            return self.tls[it] if is_indexer(it) else list(self.tls[it])
#         if self.lazy and self.xtfms is None:
# #             print(1)
#             res = tuple([tl[it] for tl in self.tls])
#             return res if is_indexer(it) else list(zip(*res))
#         elif self.xtfms is not None: 
# #             print(2)
#             res = tuple(TfmdLists(self.tls[it], self.xtfms, **kwargs)[:self.n_itemsitems_len])
#             return res if is_indexer(it) else list(res)
#         else:
# #             print(3)
#             return self.tls[it] if is_indexer(it) else list(self.tls[it])
    
#         res = tuple([tl[it] for tl in self.tls])
#         return res if is_indexer(it) else list(zip(*res))

    def __getattr__(self,k): return gather_attrs(self, k, 'tls')
    def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
    def __len__(self): return len(self.tls[0])
    def __iter__(self): return (self[i] for i in range(len(self)))
    def __repr__(self): return coll_repr(self)
    def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
    def subset(self, i): 
        print('subset:', i, len(L(self.tls)))
#         return type(self)(tls=L(tl.subset(i) for tl in self.tls),
        return type(self)(tls=L([self.tls.subset(i)]) if not self.lazy else L(tl.subset(i) for tl in self.tls), 
                          n_inp=self.n_inp, xtfms=self.xtfms, lazy=self.lazy)
    def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    def overlapping_splits(self): return self.tls[0].overlapping_splits()
    @property
    def splits(self): return self.tls[0].splits
    @property
    def split_idx(self): return self.tls[0].tfms.split_idx
    @property
    def items(self): return self.tls[0].items
    @items.setter
    def items(self, v):
        for tl in self.tls: tl.items = v

    def show(self, o, ctx=None, **kwargs):
        for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
        return ctx

    def new_empty(self):
        tls = [tl._new([], split_idx=tl.split_idx) for tl in self.tls]
        return type(self)(tls=tls, n_inp=self.n_inp)

    @contextmanager
    def set_split_idx(self, i):
        old_split_idx = self.split_idx
        for tl in self.tls: tl.tfms.split_idx = i
        yield self
        for tl in self.tls: tl.tfms.split_idx = old_split_idx

    _docs=dict(
        decode="Compose `decode` of all `tuple_tfms` then all `tfms` on `i`",
        show="Show item `o` in `ctx`",
        dataloaders="Get a `DataLoaders`",
        overlapping_splits="All splits that are in more than one split",
        subset="New `Datasets` that only includes subset `i`",
        new_empty="Create a new empty version of the `self`, keeping only the transforms",
        set_split_idx="Contextmanager to use the same `Datasets` with another `split_idx`"
    )

In [None]:
class Datasets(FilteredBase):
    "A dataset that creates a tuple from each `tfms`, passed thru `item_tfms`"
    def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
        super().__init__(dl_type=dl_type)
        self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp

    def __getitem__(self, it):
        res = tuple([tl[it] for tl in self.tls])
        return res if is_indexer(it) else list(zip(*res))

    def __getattr__(self,k): return gather_attrs(self, k, 'tls')
    def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
    def __len__(self): return len(self.tls[0])
    def __iter__(self): return (self[i] for i in range(len(self)))
    def __repr__(self): return coll_repr(self)
    def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp)
    def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    def overlapping_splits(self): return self.tls[0].overlapping_splits()
    @property
    def splits(self): return self.tls[0].splits
    @property
    def split_idx(self): return self.tls[0].tfms.split_idx
    @property
    def items(self): return self.tls[0].items
    @items.setter
    def items(self, v):
        for tl in self.tls: tl.items = v

    def show(self, o, ctx=None, **kwargs):
        for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
        return ctx

    def new_empty(self):
        tls = [tl._new([], split_idx=tl.split_idx) for tl in self.tls]
        return type(self)(tls=tls, n_inp=self.n_inp)

    @contextmanager
    def set_split_idx(self, i):
        old_split_idx = self.split_idx
        for tl in self.tls: tl.tfms.split_idx = i
        yield self
        for tl in self.tls: tl.tfms.split_idx = old_split_idx

In [None]:
# class TSTuple(tuple):
    
#     @delegates(plt.subplots)
#     def show(self, ax=None, **kwargs):
#         if ax is None: fig, ax = plt.subplots(**kwargs)
#         ax.plot(self[0].T)
#         ax.axis(xmin=0, xmax=self[0].shape[-1] - 1)
#         ax.set_title('unlabeled' if len(self) == 1 else self[1], weight='bold')
#         if ax is None: 
#             plt.tight_layout()
#             plt.show()

## Option 1: datablock --> dataloaders

In [None]:
# @ToTensor
# def encodes(self, o:np.ndarray): return TSTensor.create(o)

In [None]:
def TSTensorBlock(): return TransformBlock(item_tfms=ToTensor)

# #export
# class TSTransformBlock():
#     "A basic wrapper that links defaults transforms for the data block API"
#     def __init__(self, type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None):
#         self.type_tfms  =              L(type_tfms)
#         self.item_tfms  = ToTSTensor + L(item_tfms)
#         self.batch_tfms =              L(batch_tfms)
#         self.dl_type,self.dls_kwargs = dl_type,({} if dls_kwargs is None else dls_kwargs)

In [None]:
dblock = DataBlock(blocks=(TSTensorBlock(), CategoryBlock(vocab=None, add_na=False)),
                   get_x=ItemGetter(0), get_y=ItemGetter(1), 
                   splitter=IndexSplitter(split_idx[1]))
dls = dblock.dataloaders(source=itemify(X,y), bs=64, val_bs=128)

In [None]:
xb,yb = next(iter(dls.valid))
xb.shape

In [None]:
%time cycle_dl(dls.valid)

In [None]:
# unlabeled
# udblock = DataBlock(blocks=(TSTensorBlock(type_tfms=None, item_tfms=None, batch_tfms=None)),get_x=ItemGetter(0))
# dls    = udblock.dataloaders(source=itemify(X,), bs=64, val_bs=128)

In [None]:
dblock.summary(dblock.source)

In [None]:
# _,axs = plt.subplots(nrows=3, ncols=3, figsize=(12,10))
# dls.show_batch(ctxs=axs.flatten())

In [None]:
# xb = next(iter(dls.valid))

In [None]:
xb, yb = next(iter(dls.valid))
xb.shape, yb.shape

In [None]:
%time cycle_dl(dls.valid)

## Option 2:  datasets --> dataloaders

In [None]:
# class Datasets(FilteredBase):
#     "A dataset that creates a tuple from each `tfms`, passed thru `item_tfms`"
#     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
#         super().__init__(dl_type=dl_type)
#         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
#         self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp

#     def __getitem__(self, it):
#         res = tuple([tl[it] for tl in self.tls])
#         return res if is_indexer(it) else list(zip(*res))

#     def __getattr__(self,k): return gather_attrs(self, k, 'tls')
#     def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
#     def __len__(self): return len(self.tls[0])
#     def __iter__(self): return (self[i] for i in range(len(self)))
#     def __repr__(self): return coll_repr(self)
#     def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
#     def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp)
#     def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
#     def overlapping_splits(self): return self.tls[0].overlapping_splits()
#     @property
#     def splits(self): return self.tls[0].splits
#     @property
#     def split_idx(self): return self.tls[0].tfms.split_idx
#     @property
#     def items(self): return self.tls[0].items
#     @items.setter
#     def items(self, v):
#         for tl in self.tls: tl.items = v

#     def show(self, o, ctx=None, **kwargs):
#         for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
#         return ctx

#     def new_empty(self):
#         tls = [tl._new([], split_idx=tl.split_idx) for tl in self.tls]
#         return type(self)(tls=tls, n_inp=self.n_inp)

#     @contextmanager
#     def set_split_idx(self, i):
#         old_split_idx = self.split_idx
#         for tl in self.tls: tl.tfms.split_idx = i
#         yield self
#         for tl in self.tls: tl.tfms.split_idx = old_split_idx

#     _docs=dict(
#         decode="Compose `decode` of all `tuple_tfms` then all `tfms` on `i`",
#         show="Show item `o` in `ctx`",
#         dataloaders="Get a `DataLoaders`",
#         overlapping_splits="All splits that are in more than one split",
#         subset="New `Datasets` that only includes subset `i`",
#         new_empty="Create a new empty version of the `self`, keeping only the transforms",
#         set_split_idx="Contextmanager to use the same `Datasets` with another `split_idx`"
#     )

In [None]:
class TSDatasets(FilteredBase):
    "A dataset that creates a tuple from each `tfms`, passed thru `item_tfms`"
    def __init__(self, X=None, y=None, items=None, tfms=None, xtfms=None, tls=None, n_inp=None, dl_type=None,  **kwargs):
        super().__init__(dl_type=dl_type)
        self.xtfms = xtfms
        if items is None: 
            if y is not None: items = itemify(X, y)
            else: items = itemify(X,)
#         if tfms is not None and self.xtfms is None and items[0][0].__class__.__name__ == 'memmap': 
#             self.xtfms = Pipeline([ToTSTensor] + tfms[0])
#             tfms[0] = []
#             if tfms is not None: tfms = [[ItemGetter(i)] + t for i,t in enumerate(tfms)]
        self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
        if tls is not None: self.tls = self.tls.zip()
#         if tls is not None and self.xtfms is not None: self.tls = self.tls.zip()
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp

    def __getitem__(self, it):
        return tuple(self.tls[it])
        res = tuple(self.tls[it])
        return res if is_indexer(it) else list(zip(*res))
        

    def __getattr__(self,k): return gather_attrs(self, k, 'tls')
    def __dir__(self): return super().__dir__() + gather_attr_names(self, 'tls')
    def __len__(self): return len(self.tls[0])
    def __iter__(self): return (self[i] for i in range(len(self)))
    def __repr__(self): return coll_repr(self)
    def decode(self, o, full=True): return tuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), xtfms=self.xtfms, n_inp=self.n_inp)
    def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    def overlapping_splits(self): return self.tls[0].overlapping_splits()
    @property
    def splits(self): return self.tls[0].splits
    @property
    def split_idx(self): return self.tls[0].tfms.split_idx
    @property
    def items(self): return self.tls[0].items
    @items.setter
    def items(self, v):
        for tl in self.tls: tl.items = v

    def show(self, o, ctx=None, **kwargs):
        for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
        return ctx

    def new_empty(self):
        tls = [tl._new([], split_idx=tl.split_idx) for tl in self.tls]
        return type(self)(tls=tls, n_inp=self.n_inp)

    @contextmanager
    def set_split_idx(self, i):
        old_split_idx = self.split_idx
        for tl in self.tls: tl.tfms.split_idx = i
        yield self
        for tl in self.tls: tl.tfms.split_idx = old_split_idx

    _docs=dict(
        decode="Compose `decode` of all `tuple_tfms` then all `tfms` on `i`",
        show="Show item `o` in `ctx`",
        dataloaders="Get a `DataLoaders`",
        overlapping_splits="All splits that are in more than one split",
        subset="New `Datasets` that only includes subset `i`",
        new_empty="Create a new empty version of the `self`, keeping only the transforms",
        set_split_idx="Contextmanager to use the same `Datasets` with another `split_idx`"
    )

In [None]:
@Transform
def ToTSTensor(x:np.ndarray): return TSTensor.create(x)

In [None]:
tfms   =  [[ItemGetter(0), ToTSTensor], [ItemGetter(1),Categorize()]]
dsets = TSDatasets(X,y,tfms=tfms,splits=split_idx)
dsets.train[0]

In [None]:
dsets.train[0,1,2,3]

In [None]:
%timeit dsets[0]

In [None]:
%timeit dsets.train[0]

In [None]:
valid_dl = TfmdDL(dsets.valid, bs=128)

In [None]:
xb,yb = next(iter(valid_dl))
xb, yb

In [None]:
train_dl, valid_dl = dls.dataloaders()

In [None]:
xb,yb = next(iter(valid_dl))
xb, yb

In [None]:
%time cycle_dl(valid_dl)

In [None]:
valid_dl[0,1]

In [None]:
tfms   =  [[add(10)], [Categorize()]]
xtfms = tfms[0]
tfms[0] = []
if tfms is not None: tfms = [[ItemGetter(i)] + t for i,t in enumerate(tfms)]
tfms

In [None]:
dsets.tls[0]

In [None]:
tfms   =  [[add(10)], [Categorize()]]
if tfms is not None: tfms = [[ItemGetter(i)] + t for i,t in enumerate(tfms)]
tfms

In [None]:
%timeit dsets[0]

In [None]:
tuple(L(dsets.tls[0, 2, 3]).map(ToTSTensor))

In [None]:
# class TSDatasets(Datasets):
    
#     @delegates(plt.subplots)
#     def show(self, idx, **kwargs):
#         for i in idx if isinstance(idx, list) else [idx]: self.decode(self[i]).show(**kwargs)
#         plt.tight_layout()
#         plt.show()
    
#     def decode(self, o, full=True): return TSTuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))

In [None]:
# #export
# class TSTfmdDL(TfmdDL): 

#     @property
#     def vars(self): return self.dataset[0][0].shape[-2]
    
#     @property
#     def len(self): return self.dataset[0][0].shape[-1]

#     @delegates(plt.subplots)
#     def show_batch(self, b=None, max_n=9, nrows=3, ncols=3, figsize=(12, 10), **kwargs):
#         if b is None: b = self.one_batch()
#         db = self.decode_batch(b, max_n=max_n)
#         if nrows is None: 
#             sqrt = math.sqrt(len(db))
#             rows = min(math.ceil(sqrt), len(db)) 
#         if ncols is None: ncols = len(db) // rnows
#         fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize,  **kwargs)
#         for tup, ax in zip(db[:nrows ** 2], [axs] if nrows == 1 else axs.flatten()): 
#             show_tuple(tup, ax=ax)
#         plt.tight_layout()

# @delegates(plt.subplots)
# def show_tuple(tup, ax=None, **kwargs):
#     "Display a timeseries plot from a tuple"
#     tup[0].show(title='unlabeled' if len(tup) == 1 else tup[1], ax=ax, **kwargs)

In [None]:
# export
class TSTensor(TensorBase):

    @classmethod
    def create(cls, x): 
        return cls(x)
        return cls(To2dplusTensor(x)) # creates a TSTensor with at least 2 dimensions

    @property
    def vars(self): return self.shape[-2]

    @property
    def len(self): return self.shape[-1]
    
    def __getitem__(self, idx):
        res = super().__getitem__(idx)
        return retain_type(res, self)

    def __repr__(self):
        if self.ndim == 2:   return f'TSTensor(vars={self.shape[0]}, len={self.shape[1]})'
        elif self.ndim > 2:  return f'TSTensor(samples={self.shape[-3]}, vars={self.shape[-2]}, len={self.shape[-1]})'

    def show(self, ax=None, ctx=None, title=None, **kwargs):
        ax = ifnone(ax,ctx)
        if ax is None: fig, ax = plt.subplots(**kwargs)
        ax.plot(self.T)
        ax.axis(xmin=0, xmax=self.shape[-1] - 1)
        ax.set_title(title, weight='bold')
        plt.tight_layout()
        return ax
    
@Transform
def ToTSTensor(x:np.ndarray): return TSTensor.create(x)

In [None]:
tfms   =  [[ItemGetter(0), ], [ItemGetter(1), Categorize()]]
items  =  itemify(X,y)
splits =  IndexSplitter(split_idx[1])(items)
dsets  =  Datasets(items, tfms=tfms, splits=splits)
dls    =  TfmdDL(dsets, bs=128)

In [None]:
tfms   =  [[ItemGetter(0), ], [ItemGetter(1), Categorize()]]
TSDatasets(X,y, tfms)

In [None]:
%timeit tuple(L(dsets[0]).map(ToTSTensor))

In [None]:
%timeit ToTSTensor(dsets[0])

In [None]:
items = None
tls = None

kwargs = {}
tfms   =  [[ItemGetter(0), ], [ItemGetter(1), Categorize()]]

if items is None: 
    if y is not None: items = itemify(X, y)
    else: items = itemify(X,)

tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])

res = tuple([tl[it] for tl in tls])
res if is_indexer(it) else list(zip(*res))

In [None]:
tls[0][0]

In [None]:
tls[0]

In [None]:
tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))]).zip()

In [None]:
it = 0

if is_indexer(it) : res = ToTSTensor(tls[it])
else: res = list(zip(*res))
res

In [None]:
%timeit ToTSTensor(tls[it])

In [None]:
items = itemify(X,y)
items.map(None)

## New heading

In [None]:
test_eq(items, pickle.loads(pickle.dumps(items)))

In [None]:
dls = TSTfmdDL(dsets, bs=128)
dls.vars, dls.len, dls.c

In [None]:
dls = TfmdDL(dsets, bs=128, )
xb, yb = next(iter(dls.train))
xb, yb

In [None]:
t = dls.one_batch()
t

In [None]:
%time cycle_dl(dls.valid)

In [None]:
%time cycle_dl(valid_dl)

In [None]:
from fastai2.data.all import *
dblock = DataBlock(blocks=(TSTensorBlock, CategoryBlock),
                   get_x=ItemGetter(0), get_y=ItemGetter(1), 
                   splitter=IndexSplitter(split_idx[1])
                  )
dsets1 =  dblock.datasets(itemify(X,y))
dls1 =    TSTfmdDL(dsets, bs=128, num_workers=4)

In [None]:
tfms   =  [[ItemGetter(0), ToTSTensor], [ItemGetter(1), Categorize()]]
items  =  itemify(X,y)
splits =  IndexSplitter(split_idx[1])(items)
dsets2  =  TSDatasets(items, tfms=tfms, splits=splits)
dls2    =  TSTfmdDL(dsets, bs=128)

In [None]:
tfms    =  [[ItemGetter(0)], [ItemGetter(1)]]
items   =  itemify(X,y)
splits  =  IndexSplitter(split_idx[1])(items)
dsets3  =  Datasets(items, tfms=tfms, splits=splits)
dls3    =  TfmdDL(dsets, bs=128)

In [None]:
%time cycle_dl(dls3.valid)

In [None]:
%time cycle_dl(dls2.valid)

In [None]:
%time cycle_dl(dls3.valid)

In [None]:
def unzip(tup):  return list(zip(*tup))

In [None]:
%timeit TupleGetter(0)(items)

In [None]:
tls = L([TfmdLists(items, [ItemGetter(0), ToTSTensor]), TfmdLists(items, [ItemGetter(1), Categorize()])]).zipped()

In [None]:
delegates

In [None]:
tls

In [None]:
%timeit tuple([tls[0][0], tls[1][0]])

In [None]:
tls[0][1]

In [None]:
%timeit ToTSTensor(items[0][0])

In [None]:
%timeit tls[0][0]

In [None]:
%timeit (tls[it] for tl in tls]))

In [None]:
%timeit ([tl[it] for tl in tls])

In [None]:
%timeit tuple([tl[it] for tl in tls])

In [None]:
%timeit TfmdLists(items, ItemGetter(0))

In [None]:
xb,yb = dls1.one_batch()
xb[0], yb[0]

In [None]:
xb,yb = dls2.one_batch()
xb[0], yb[0]

In [None]:
dls2.show_batch()

In [None]:
from fastai2.callback.all import *
# model =  ResNet(dls.vars, dls.c)
model  = inception_time(dls.vars, dls.c)
learn =  Learner(dls1.dataloaders(),
                model,
                loss_func=nn.CrossEntropyLoss(),
                metrics=accuracy)
learn.fit_one_cycle(25, lr_max=1e-3)

# New heading

In [None]:
# export
# class TSTuple(tuple):
    
#     @delegates(plt.subplots)
#     def show(self, ax=None, **kwargs):
#         if ax is None: fig, ax = plt.subplots(**kwargs)
#         ax.plot(self[0].T)
#         ax.axis(xmin=0, xmax=self[0].shape[-1] - 1)
#         ax.set_title('unlabeled' if len(self) == 1 else self[1], weight='bold')
#         if ax is None: 
#             plt.tight_layout()
#             plt.show()

In [None]:
#export
# class TSDatasets(Datasets):
    
#     @delegates(plt.subplots)
#     def show(self, idx, **kwargs):
#         for i in idx if isinstance(idx, list) else [idx]: self.decode(self[i]).show(**kwargs)
#         plt.tight_layout()
#         plt.show()
    
#     def decode(self, o, full=True): return TSTuple(tl.decode(o_, full=full) for o_,tl in zip(o,tuplify(self.tls, match=o)))

In [None]:
#export
class TSTfmdDL(TfmdDL): 

    @property
    def vars(self): return self.dataset[0][0].shape[-2]
    
    @property
    def len(self): return self.dataset[0][0].shape[-1]

    @delegates(plt.subplots)
    def show_batch(self, b=None, max_n=9, rows=None, cols=None, figsize=(10, 10), **kwargs):
        if b is None: b = self.one_batch()
        db = self.decode_batch(b, max_n=max_n)
        sqrt = math.sqrt(len(db))
        if rows is None: rows = min(math.ceil(sqrt), len(db)) 
        if cols is None: cols = len(db) // rows
        fig, axs = plt.subplots(rows, cols, figsize=figsize,  **kwargs)
        for tup, ax in zip(db[:rows ** 2], [axs] if rows == 1 else axs.flatten()): tup.show(ax=ax, **kwargs)
        plt.tight_layout()
        plt.show()

## Examples

### Datasets

I've used 2 datasets to test the enw functionality.

1. A large dataset that don't fit in memory (using np.memmap arrays)
2. A smaller dataset that fits in memory (using regular np.ndarrays)

As you can see, the only difference is really how you load the data:

1. To create a normal array load is with mmap_mode = None (default value). 
2. To create an array on disk (np.memmap) you'll need to select a mmap_mode. I normally use c, since I don't want to modify data on disk.

In [None]:
dsid = 'StarLightCurves'

In [None]:
if dsid == 'memmap':
    X = np.load('X_on_disk.npy', mmap_mode='c')
    y = np.load('y_on_disk.npy', mmap_mode='c')
    sel_arr_class = X.__class__.__name__

elif dsid == 'memmap_small':
    X = np.load('X_on_disk_small.npy', mmap_mode='c')
    y = np.load('y_on_disk_small.npy', mmap_mode='c')
    sel_arr_class = X.__class__.__name__

elif dsid == 'numeric':
    X = np.zeros((100, 5, 10)) + np.arange(100).reshape(100, 1, 1)
    y = np.arange(100)
    sel_arr_class = X.__class__.__name__
    
else:
    X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, path='..', verbose=False)
    X = np.concatenate((X_train, X_valid))
    y = np.concatenate((y_train, y_valid))
    folder = 'data/UCR'
    np.save(str(PATH.parent/f'{folder}/{dsid}/X.npy'), X) # cannot use pathlib.PosixPath as filename 
    np.save(str(PATH.parent/f'{folder}/{dsid}/y.npy'), y)
    del X, y
    X = np.load(str(PATH.parent/f'{folder}/{dsid}/X.npy'), mmap_mode='c')
    y = np.load(str(PATH.parent/f'{folder}/{dsid}/y.npy'), mmap_mode='c')
    sel_arr_class = X.__class__.__name__
    
X.shape, y.shape

In [None]:
# class TSTransformBlock():
#     "A basic wrapper that links defaults transforms for the data block API"
#     def __init__(self, type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None):
#         self.type_tfms  =            L(type_tfms)
#         self.item_tfms  = ToTensor + L(item_tfms)
#         self.batch_tfms =            L(batch_tfms)
#         self.dl_type,self.dls_kwargs = dl_type,({} if dls_kwargs is None else dls_kwargs)
        
# def TSTensorBlock(cls=TSTensor): return TransformBlock(type_tfms=cls.create)

In [None]:
items = L(X, y).zip()
items[:3]

In [None]:
item = L(X, y).zip()[0]
item

In [None]:
ToTSTensor(ItemGetter(0)(item))

In [None]:
get_items_from_numpy = itemify

In [None]:
from fastai2.data.all import *
dblock = DataBlock(blocks=(TSTensorBlock, CategoryBlock),
                   get_x=ItemGetter(0), get_y=ItemGetter(1), 
                   splitter=RandomSplitter()
                  )
dsets = dblock.datasets(L(X, y).zip())
t = dsets[0]
dsets.show(t)

In [None]:
t = dsets[0]
type(dsets.decode(t)[1])

In [None]:
dsets.show(t)

In [None]:
dsets.train

In [None]:
dsets.valid

In [None]:
dsets.show(dsets.train[0])

In [None]:
dls = dblock.dataloaders(dsets, )

In [None]:
from fastai2.data.all import *
dblock = DataBlock(blocks=(TSTensorBlock, CategoryBlock),
                   get_items=itemify,
                   get_x=ItemGetter(0), get_y=ItemGetter(1), 
                   splitter=RandomSplitter()
                  )
dsets = dblock.datasets((X, y))
dsets.train[0]

In [None]:
dsets.decode(dsets.train[0])

In [None]:
dls = dblock.dataloaders(L(X, y).zip())
# dls.show_batch(rows=3, cols=3, sharey=True)

In [None]:
dls.dataset

In [None]:
tfms = [[ItemGetter(0), ToTSTensor], [ItemGetter(1), Categorize()]]
splits = RandomSplitter()(X)
items = L(X, y).zip()
dsets = Datasets(items, tfms=tfms, splits=splits)
t = dsets[0]
t

In [None]:
t[0].data, t[1].data

In [None]:
dsets.show(0)

### No tfms

In [None]:
tfms = [[ItemGetter(0), ], [ItemGetter(1), ]]
splits = RandomSplitter()(X)
items = Lzip(X, y)
dsets = TSDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets[0], (X[0], y if y is None else y[0]))
test_eq(dsets[0][0].__class__.__name__, sel_arr_class)
dsets[0]

In [None]:
# ALL DATA
tfms = [[ItemGetter(0), ], [ItemGetter(1), ]]
splits = RandomSplitter()(X)
items = Lzip(X, y)
dsets = TSDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets[0], (X[0], y if y is None else y[0]))
test_eq(dsets[0][0].__class__.__name__, sel_arr_class)
dsets[0]

In [None]:
# TRAIN
tfms = [[ItemGetter(0), ], [ItemGetter(1), ]]
splits = RandomSplitter()(X)
items = Lzip(X, y)
dsets = TSDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets.train[0], (X[splits[0][0]], y if y is None else y[splits[0][0]]))
test_eq(dsets.train[0][0].__class__.__name__, sel_arr_class)
dsets.train[0]

In [None]:
# VALID
tfms = [[ItemGetter(0), ], [ItemGetter(1), ]]
splits = RandomSplitter()(X)
items = Lzip(X, y)
dsets = TSDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets.valid[0], (X[splits[1][0]], y if y is None else y[splits[1][0]]))
test_eq(dsets.valid[0][0].__class__.__name__, sel_arr_class)
dsets.valid[0]

### Unlabeled

In [None]:
# UNLABELED ALL
tfms = [[ItemGetter(0), ToTSTensor], []]
splits = RandomSplitter()(X)
items = Lzip(X,)
dsets = TSDatasets(items, tfms=tfms, splits=splits)
# test_eq(dsets[0], (X[0], ))
# test_eq(dsets[0][0].__class__.__name__, sel_arr_class)
dsets[0]

In [None]:
# UNLABELED TRAIN
tfms = None
splits = RandomSplitter()(X)
items = tuple((samplify(X), ))
dsets = NumpyDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets.train[0], (X[splits[0][0]], ))
test_eq(dsets.train[0][0].__class__.__name__, sel_arr_class)
dsets.train[0]

In [None]:
# UNLABELED VALID
tfms = None
splits = RandomSplitter()(X)
items = tuple((samplify(X), ))
dsets = NumpyDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets.valid[0], (X[splits[1][0]], ))
test_eq(dsets.valid[0][0].__class__.__name__, sel_arr_class)
dsets.valid[0]

### Transforms & decode

In [None]:
# TFMS ALL
tfms = [[ToTSTensor], [Categorize()]]
splits = RandomSplitter()(X)
items = tuple((samplify(X), samplify(y)))
dsets = NumpyDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets[0][0].__class__.__name__, 'TSTensor')
test_eq(dsets[0][1].__class__.__name__, 'TensorCategory')
dsets[0], dsets[0][0].data, dsets[0][1].data

In [None]:
# TFMS TRAIN
tfms = [[ToTSTensor], [Categorize()]]
splits = RandomSplitter()(X)
items = tuple((samplify(X), samplify(y)))
dsets = NumpyDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets.train[0][0].__class__.__name__, 'TSTensor')
test_eq(dsets.train[0][1].__class__.__name__, 'TensorCategory')
test_eq(dsets.decode(dsets.train[0])[0].data, tensor(X[splits[0][0]]))
test_eq(dsets.decode(dsets.train[0])[1], str(y[splits[0][0]]))
dsets.train[0], dsets.train[0][0].data, dsets.train[0][1].data, dsets.decode(dsets.train[0])

In [None]:
# TFMS VALID
tfms = [[ToTSTensor], [Categorize(add_na=True)]]
splits = RandomSplitter()(X)
items = tuple((samplify(X), samplify(y)))
dsets = NumpyDatasets(items, tfms=tfms, splits=splits)
test_eq(dsets[0][0].__class__.__name__, 'TSTensor')
test_eq(dsets[0][1].__class__.__name__, 'TensorCategory')
if dsid != 'numeric': 
    test_eq(dsets.decode(dsets.valid[0])[0].data, tensor(X[splits[1][0]]))
    test_eq(dsets.decode(dsets.valid[0])[1], str(y[splits[1][0]]))
dsets.valid[0], dsets.valid[0][0].data, dsets.valid[0][1].data, dsets.decode(dsets.valid[0])

In [None]:
# Decode
dec_x, dec_y = dsets.decode(dsets[0])
test_eq(dec_y, str(y[0]))
dsets[0], dec_x, dec_y, type(dec_x), type(dec_y)

### Plots

In [None]:
tfms = [[ItemGetter(0), ToTSTensor], [ItemGetter(1), Categorize()]]
splits = RandomSplitter()(X)
items = Lzip(X, y)
dsets = TSDatasets(items, tfms=tfms, splits=splits)
dsets.show(idx=[0, 1])

In [None]:
dsets.train.show(3)

In [None]:
dsets.valid.show(idx=3)

### Dataloaders

In [None]:
tfms = [[ItemGetter(0), ToTSTensor], [ItemGetter(1), Categorize()]]
splits = RandomSplitter()(X)
items = itemify(X, y)
dsets = Datasets(items, tfms=tfms, splits=splits)
dls = TSTfmdDL(dsets, bs=16)
xb,yb = dls.one_batch()
(xb, yb), (xb[0], yb[0]), dls.decode_batch((xb, yb))[0]

In [None]:
train_dl = TSTfmdDL(dsets.train, bs=16)
xb,yb = train_dl.one_batch()
if dsid == 'numeric': test_eq(train_dl.decode_batch((xb, yb))[0][1], str(y[splits[0][0]]))
(xb[0], yb[0]), train_dl.decode_batch((xb, yb))[0]

In [None]:
valid_dl = TSTfmdDL(dsets.valid, bs=16)
xb,yb = valid_dl.one_batch()
(xb[0], yb[0]), valid_dl.decode_batch((xb, yb))[0]

In [None]:
dls.show_batch(max_n=9, figsize=(10,10), sharey=True)

In [None]:
train_dl.show_batch(max_n=9, figsize=(10, 10))

In [None]:
valid_dl.show_batch(max_n=9, figsize=(10, 10), sharey=True)

## Learner

In [None]:
import torch
import torch.nn as nn
from fastai2.torch_core import *
from fastai2.layers import *
from fastai2.imports import *

# Iception Time paper: https://arxiv.org/abs/1909.04939

class AdaptiveConcatPool1d(nn.Module):
    "Layer that concats `AdaptiveAvgPool1d` and `AdaptiveMaxPool1d`"
    def __init__(self, size=None):
        super().__init__()
        self.size = size or 1
        self.ap = nn.AdaptiveAvgPool1d(self.size)
        self.mp = nn.AdaptiveMaxPool1d(self.size)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)


act_fn = nn.ReLU(inplace=True)
def conv(ni, nf, ks=3, stride=1, bias=False):
    return nn.Conv1d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)

class Shortcut(Module):
    "Merge a shortcut with the result of the module by adding them. Adds Conv, BN and ReLU"
    def __init__(self, ni, nf, act_fn=act_fn): 
        self.act_fn=act_fn
        self.conv=conv(ni, nf, 1)
        self.bn=nn.BatchNorm1d(nf)
    def forward(self, x): return act_fn(x + self.bn(self.conv(x.orig)))

class InceptionModule(Module):
    def __init__(self, ni, nb_filters=32, kss=[39, 19, 9], bottleneck_size=32, stride=1):
        self.bottleneck = nn.Conv1d(ni, bottleneck_size, 1) if (bottleneck_size>0 and ni>1) else noop
        self.convs = nn.ModuleList([conv(bottleneck_size if (bottleneck_size>1 and ni>1) else ni, nb_filters, ks) for ks in kss])
        self.maxpool_bottleneck = nn.Sequential(nn.MaxPool1d(3, stride, padding=1), conv(ni, nb_filters, 1))
        self.bn_relu = nn.Sequential(nn.BatchNorm1d((len(kss)+1)*nb_filters), nn.ReLU())
    def forward(self, x):
        bottled = self.bottleneck(x)
        return self.bn_relu(torch.cat([c(bottled) for c in self.convs]+[self.maxpool_bottleneck(x)], dim=1))

def inception_time(ni, nout, ks=40, depth=6, bottleneck_size=32, nb_filters=32, head=True):
    layers = []
    
    # compute kernel sizes: eg for ks=40 => kss=[39, 19, 9] 
    kss = [ks // (2**i) for i in range(3)]
    # ensure odd kss until nn.Conv1d with padding='same' is available in pytorch 1.3
    kss = [ksi if ksi % 2 != 0 else ksi - 1 for ksi in kss]
    n_ks = len(kss) + 1
    for d in range(depth):
        # Farid
      # im = SequentialEx(InceptionModule(ni if d==0 else n_ks*nb_filters, kss=kss, bottleneck_size=bottleneck_size))
        im = SequentialEx(InceptionModule(ni if d==0 else n_ks*nb_filters, kss=kss, bottleneck_size=bottleneck_size if d > 0 else 0))
        if d%3==2: im.append(Shortcut(n_ks*nb_filters, n_ks*nb_filters))      
        layers.append(im)
    head = [AdaptiveConcatPool1d(), Flatten(), nn.Linear(2*n_ks*nb_filters, nout)] if head else []
    return  nn.Sequential(*layers, *head)

In [None]:
tfms =   [[ItemGetter(0), ToTSTensor], [ItemGetter(1), Categorize()]]
splits = RandomSplitter()(X)
items =  Lzip(X,y)
dsets =  TSDatasets(items, tfms=tfms, splits=splits)
dls =    TSTfmdDL(dsets, bs=16)
model =  ResNet(dls.vars, dls.c)
# model  = inception_time(dls.vars, dls.c)
learn =  Learner(dls.dataloaders(),
                model,
                loss_func=nn.CrossEntropyLoss(),
                metrics=accuracy,
#                 cbs=VerboseCallback()
               )

In [None]:
train_dl, valid_dl = dls.dataloaders()
train_ds, valid_ds = dls.train, dls.valid

In [None]:
dls.show_batch()

In [None]:
xb,yb=dls.one_batch()
learn.loss_func(model(xb), yb)

In [None]:
from fastai2.callback.all import *
model =  ResNet(dls.vars, dls.c)
# model  = inception_time(dls.vars, dls.c)
learn =  Learner(dls.dataloaders(),
                model,
                loss_func=nn.CrossEntropyLoss(),
                metrics=accuracy,
#                 cbs=VerboseCallback()
               )
learn.fit_one_cycle(25, lr_max=1e-3)

In [None]:
# delegates(Learner.__init__)


# # def cnn_learner(dls, arch, loss_func=None, pretrained=True, cut=None, splitter=None,
# # y_range=None, config=None, n_in=3, n_out=None, normalize=True, **kwargs):
# def ts_learner(dls,
#                model=None,
#                #opt_func=Ranger,
#                loss_func=None,
#                cbs=None,
#                metrics=None,
#                **kwargs):
#     "Build a ts learner with default settings if None is passed"
#     n_in = dls.vars #get_n_channels(dls.train)  # data.n_channels
#     n_out = dls.c  # Number of classes

#     if model is None:
#         model = inception_time(n_in, n_out).to(device=default_device())
#     #     if opt_func is None: opt_func = Ranger
#     if loss_func is None: loss_func = LabelSmoothingCrossEntropy()
#     if cbs is None: cbs = L(cbs)
#     if metrics is None: metrics = accuracy

#     learn = Learner(dls,
#                     model,
#                     #opt_func=opt_func,
#                     loss_func=loss_func,
# #                     metrics=metrics,
# #                     **kwargs
#                    )

#     return learn

In [None]:
from fastai2.data.all import *
dblock = DataBlock(blocks=(TSTensorBlock, CategoryBlock),
                   get_x=ItemGetter(0), get_y=ItemGetter(1), 
                   splitter=RandomSplitter()
                  )
dsets = dblock.datasets(itemify(X,y))
dls =    TSTfmdDL(dsets, bs=16)
# model =  ResNet(dls.vars, dls.c)
model  = inception_time(dls.vars, dls.c)
learn =  Learner(dls.dataloaders(),
                model,
                loss_func=nn.CrossEntropyLoss(),
                metrics=accuracy,
#                 cbs=VerboseCallback()
               )

In [None]:
dls.show_batch()

In [None]:
from fastai2.callback.all import *
learn.fit_one_cycle(25, lr_max=1e-3)