In [None]:
# default_exp core

# Core

> Main Numpy and Times Series functions used throughout the library.

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.test import *
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [None]:
#export
from tsai.imports import *
# import torch
# import torch
# import torch.nn as nn
# from fastai2.imports import *
# from fastai2.torch_core import *
# from fastai2.data.core import *
# from fastai2.learner import Learner
# from fastcore.transform import *
# from fastai2.vision.data import *
# from fastai2.data.transforms import *
# from fastai2.metrics import *
from tsai.utils import *
from tsai.data import *
from tsai.models.all import *

In [None]:
dsid = 'NATOPS'
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, parent_dir='./data/UCR/', verbose=True, on_disk=True)
X = np.concatenate((X_train, X_valid))
y = np.concatenate((y_train, y_valid))
splits = (L(list(np.arange(len(X_train)))), L(list(np.arange(len(X_train), len(X)))))
splits

Dataset: NATOPS
X_train: (180, 24, 51)
y_train: (180,)
X_valid: (180, 24, 51)
y_valid: (180,) 



((#180) [0,1,2,3,4,5,6,7,8,9...],
 (#180) [180,181,182,183,184,185,186,187,188,189...])

In [None]:
#export
class NumpyTensor(TensorBase):
    "Returns a `tensor` of type torch.float32 and class `NumpyTensor` that has a show method"
    def __new__(cls, o, **kwargs): 
        if isinstance(o, (list, L)): o = np.stack(o)
        res = tensor(o)
        res.__class__ = cls
        res._meta = kwargs
        return res
    
    def __getitem__(self, idx):
        res = super().__getitem__(idx)
        return type(self)(res)

    def __repr__(self):
        return f'NumpyTensor(shape:{list(self.shape)})'

    def show(self, ax=None, ctx=None, title=None, title_color='black', **kwargs):
        if self.ndim != 2: self = type(self)(To2DTensor(self))
        ax = ifnone(ax,ctx)
        if ax is None: fig, ax = plt.subplots(**kwargs)
        ax.plot(self.T)
        ax.axis(xmin=0, xmax=self.shape[-1] - 1)
        ax.set_title(title, weight='bold', color=title_color)
        plt.tight_layout()
        return ax

class ToNumpyTensor(Transform):
    "Transforms np.ndarray to NumpyTensor"
    def encodes(self, o:np.ndarray): return NumpyTensor(o)

In [None]:
#hide
a = np.random.rand(2,3,4)
test_eq(NumpyTensor(a).shape, (2,3,4))
test_eq(ToNumpyTensor()(a).shape, (2,3,4))

In [None]:
#export
class TSTensor(NumpyTensor):
    '''Returns a tensor oftype torch.float32 and class TSTensor that has a show method'''

    @property
    def vars(self): return self.shape[-2]

    @property
    def len(self): return self.shape[-1]

    def __repr__(self):
        if self.ndim >= 3:   return f'TSTensor(samples:{self.shape[-3]}, vars:{self.shape[-2]}, len:{self.shape[-1]})'
        elif self.ndim == 2: return f'TSTensor(vars:{self.shape[-2]}, len:{self.shape[-1]})'
        elif self.ndim == 1: return f'TSTensor(len:{self.shape[-1]})'
        else: return f'TSTensor({self.dtype})'

class ToTSTensor(Transform):
    def encodes(self, o:np.ndarray): return TSTensor(o)

In [None]:
a = np.random.rand(2,3,4)
test_eq(TSTensor(a).shape, (2,3,4))
test_eq(ToTSTensor()(a).shape, (2,3,4))
TSTensor(a), TSTensor(a[0]), TSTensor(a[0,0]), TSTensor(a[0,0,0])

(TSTensor(samples:2, vars:3, len:4),
 TSTensor(vars:3, len:4),
 TSTensor(len:4),
 TSTensor(torch.float32))

In [None]:
#export
class NumpyTensorBlock():
    def __init__(self, type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None):
        self.type_tfms  =                 L(type_tfms)
        self.item_tfms  = ToNumpyTensor + L(item_tfms)
        self.batch_tfms =                 L(batch_tfms)
        self.dl_type,self.dls_kwargs = dl_type,({} if dls_kwargs is None else dls_kwargs)
        
class TSTensorBlock():
    def __init__(self, type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None):
        self.type_tfms  =              L(type_tfms)
        self.item_tfms  = ToTSTensor + L(item_tfms)
        self.batch_tfms =              L(batch_tfms)
        self.dl_type,self.dls_kwargs = dl_type,({} if dls_kwargs is None else dls_kwargs)

In [None]:
test_eq(NumpyTensorBlock().item_tfms[0].__name__, 'ToNumpyTensor')
test_eq(TSTensorBlock().item_tfms[0].__name__, 'ToTSTensor')

In [None]:
#export
class Dataset():
    def __init__(self, X, y): self.X, self.y = X, y
    def __getitem__(self, idx): return (self.X[idx], self.y[idx])
    def __len__(self): return len(self.X)

class NumpyDataset():
    def __init__(self, X, y=None, types=None): self.X, self.y, self.types = X, y, types
    def __getitem__(self, idx): 
        if self.types is None: return (self.X[idx], self.y[idx]) if self.y is not None else (self.X[idx])
        else: return (self.types[0](self.X[idx]), self.types[1](self.y[idx])) if self.y is not None else (self.types[0](self.X[idx]))
    def __len__(self): return len(self.X)
    @property
    def c(self): return 0 if self.y is None else 1 if isinstance(self.y[0], float) else len(np.unique(self.y)) 

class TSDataset():
    def __init__(self, X, y=None, types=None, sel_vars=None, sel_steps=None): 
        self.X, self.y, self.types = X, y, types
        self.sel_vars = ifnone(sel_vars, slice(None))
        self.sel_steps = ifnone(sel_steps,slice(None))
    def __getitem__(self, idx): 
        if self.types is None: return (self.X[idx, self.sel_vars, self.sel_steps], self.y[idx]) if self.y is not None else (self.X[idx])
        else: return (self.types[0](self.X[idx, self.sel_vars, self.sel_steps]), self.types[1](self.y[idx])) if self.y is not None else (self.types[0](self.X[idx]))
    def __len__(self): return len(self.X)
    @property
    def c(self): return 0 if self.y is None else 1 if isinstance(self.y[0], float) else len(np.unique(self.y)) 
    @property
    def vars(self): return self[0][0].shape[-2]
    @property
    def len(self): return self[0][0].shape[-1]

In [None]:
a = np.random.rand(5,6,7)
b = np.random.rand(5)
ds = NumpyDataset(a,b)
xb, yb = ds[[0,4]]
test_eq(xb.shape, (2,6,7))
test_eq(yb.shape, (2,))
test_eq(ds.c, 1)

a = np.random.rand(5,6,7)
b = np.random.randint(0, 2, 5)
ds = TSDataset(a,b)
test_eq(ds.c, 2)
test_eq(ds.vars, 6)
test_eq(ds.len, 7)

In [None]:
#export
class NumpyDatasets(Datasets):
    "A dataset that creates tuples from X (and y) and applies `item_tfms`"
    _xtype, _ytype = NumpyTensor, None # Expected X and y output types (must have a show method)
    def __init__(self, X=None, y=None, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, inplace=False, **kwargs):
        self.inplace = inplace
        if tls is None: 
            X = itemify(X, tup_id=0)
            y = itemify(y, tup_id=0) if y is not None else y
            items = tuple((X)) if y is None else tuple((X,y))
            self.tfms = L(ifnone(tfms,[None]*len(ifnone(tls,items))))
        self.tls = L(tls if tls else [TfmdLists(item, t, **kwargs) for item,t in zip(items,self.tfms)])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
        if len(self.tls[0]) > 0: 
            # type(tl[0]).__name__ == 'memmap' is added to avoid loading in memory larger than RAM datasets
            self.ptls = L([tl if not self.inplace else tl[:] if type(tl[0]).__name__ == 'memmap' else stack(tl[:]) for tl in self.tls])
            self.types = [ifnone(_typ, type(tl[0]) if isinstance(tl[0], torch.Tensor) else tensor) for tl,_typ in zip(self.tls, [self._xtype, self._ytype])]
    
    def __getitem__(self, it):
        return tuple([typ(ptl[it] if i==0 else ptl[it]) for i,(ptl,typ) in enumerate(zip(self.ptls,self.types))])
    
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp, inplace=self.inplace, tfms=self.tfms)
    
    def _new(self, X, *args, y=None, **kwargs): 
        items = ifnoneelse(y,tuple((X)),tuple((X, y)))
        return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    
    def show_at(self, idx, **kwargs):
        self.show(self[idx], **kwargs)
        plt.show()

    @property
    def items(self): return tuple([tl.items for tl in self.tls])
    @items.setter
    def items(self, vs):
        for tl,c in zip(self.tls, vs): tl.items = v


class TSDatasets(NumpyDatasets):
    "A dataset that creates tuples from X (and y) and applies `item_tfms`"
    _xtype, _ytype = TSTensor, None # Expected X and y output types (torch.Tensor - default - or subclass)
    def __init__(self, X=None, y=None, items=None, sel_vars=None, sel_steps=None, tfms=None, tls=None, n_inp=None, dl_type=None, 
                 inplace=False, **kwargs):
        self.inplace = inplace
        if tls is None: 
            X = itemify(X, tup_id=0)
            y = itemify(y, tup_id=0) if y is not None else y
            items = tuple((X)) if y is None else tuple((X,y))
            self.tfms = L(ifnone(tfms,[None]*len(ifnone(tls,items))))
        self.sel_vars = ifnone(sel_vars, slice(None))
        self.sel_steps = ifnone(sel_steps,slice(None))
        self.tls = L(tls if tls else [TfmdLists(item, t, **kwargs) for item,t in zip(items,self.tfms)])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
        if len(self.tls[0]) > 0: 
            self.ptls = L([tl if not self.inplace else tl[:] if type(tl[0]).__name__ == 'memmap' else stack(tl[:]) for tl in self.tls])
            self.types = [ifnone(_typ, type(tl[0]) if isinstance(tl[0], torch.Tensor) else tensor) for tl,_typ in zip(self.tls, [self._xtype, self._ytype])]
    
    def __getitem__(self, it):
        return tuple([typ(ptl[it])[...,self.sel_vars, self.sel_steps] if i==0 else typ(ptl[it]) for i,(ptl,typ) in enumerate(zip(self.ptls,self.types))])
    
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp, 
                                           inplace=self.inplace, tfms=self.tfms, sel_vars=self.sel_vars, sel_steps=self.sel_steps)
    @property
    def vars(self): return self[0][0].shape[-2]
    @property
    def len(self): return self[0][0].shape[-1]

In [None]:
test_eq(np.stack(itemify(y, tup_id=0)), y)

In [None]:
dsets = TSDatasets(X, y, tfms=None, splits=splits, inplace=False)
test_eq(len(dsets.train), len(X_train))
dsets = TSDatasets(X, y, tfms=None, splits=splits, inplace=True)
test_eq(len(dsets.train), len(X_train))
dsets = TSDatasets(X, y, tfms=[add(1), Categorize()], splits=splits, inplace=True)
test_eq(len(dsets.train), len(X_train))

In [None]:
#export
def add_ds(dsets, X, y=None, test_items=None, rm_tfms=None, with_labels=False):
    "Create test datasets from X (and y) using validation transforms of `dsets`"
    items = ifnoneelse(y,tuple((X,)),tuple((X, y)))
    with_labels = ifnoneelse(y,False,True) 
    if isinstance(dsets, (Datasets, NumpyDatasets, TSDatasets)):
        tls = dsets.tls if with_labels else dsets.tls[:dsets.n_inp]
        new_tls = L([tl._new(item, split_idx=1) for tl,item in zip(tls, items)])
        if rm_tfms is None: rm_tfms = [tl.infer_idx(get_first(item)) for tl,item in zip(new_tls, items)]
        else:               rm_tfms = tuplify(rm_tfms, match=new_tls)
        for i,j in enumerate(rm_tfms): new_tls[i].tfms.fs = new_tls[i].tfms.fs[j:]
        if isinstance(dsets, (NumpyDatasets, TSDatasets)):
            cls = dsets.__class__
            return cls(tls=new_tls, n_inp=dsets.n_inp, inplace=dsets.inplace, tfms=dsets.tfms, sel_vars=dsets.sel_vars, sel_steps=dsets.sel_steps)
        elif isinstance(dsets, Datasets): return Datasets(tls=new_tls)
    elif isinstance(dsets, TfmdLists):
        new_tl = dsets._new(items, split_idx=1)
        if rm_tfms is None: rm_tfms = dsets.infer_idx(get_first(items))
        new_tl.tfms.fs = new_tl.tfms.fs[rm_tfms:]
        return new_tl
    else: raise Exception(f"This method requires using the fastai library to assemble your data.Expected a `Datasets` or a `TfmdLists` but got {dsets.__class__.__name__}")

@patch
def add_test(self:NumpyDatasets, X, y=None, test_items=None, rm_tfms=None, with_labels=False):
    return add_ds(self, X, y=y, test_items=test_items, rm_tfms=rm_tfms, with_labels=with_labels)

@patch
def add_unlabeled(self:NumpyDatasets, X, test_items=None, rm_tfms=None, with_labels=False):
    return add_ds(self, X, y=None, test_items=test_items, rm_tfms=rm_tfms, with_labels=with_labels)

In [None]:
dsets = TSDatasets(X, y, tfms=[None, Categorize()], splits=splits, inplace=True)
test_eq(len(dsets.add_test(X_train, y_train)), len(X_train))
test_eq(len(dsets.add_unlabeled(X_train)), len(X_train))

In [None]:
#export
class NumpyDataLoader(TfmdDL):
    do_item = noops
    def create_batch(self, b): return self.dataset[b]

    @delegates(plt.subplots)
    def show_batch(self, b=None, ctxs=None, max_n=9, nrows=3, ncols=3, figsize=(16, 10), **kwargs):
        b = self.one_batch()
        db = self.decode_batch(b, max_n=max_n)
        if figsize is None: figsize = (ncols*6, max_n//ncols*4)
        if ctxs is None: ctxs = get_grid(min(len(db), nrows*ncols), nrows=None, ncols=ncols, figsize=figsize, **kwargs)
        for i,ctx in enumerate(ctxs):
            show_tuple(db[i], ctx=ctx)

    @delegates(plt.subplots)
    def show_results(self, b, preds, ctxs=None, max_n=9, nrows=3, ncols=3, figsize=(16, 10), **kwargs):
        t = self.decode_batch(b, max_n=max_n)
        p = self.decode_batch((b[0],preds), max_n=max_n)
        if figsize is None: figsize = (ncols*6, max_n//ncols*4)
        if ctxs is None: ctxs = get_grid(min(len(t), nrows*ncols), nrows=None, ncols=ncols, figsize=figsize, **kwargs)
        for i,ctx in enumerate(ctxs): 
            title = f'True: {t[i][1]}\nPred: {p[i][1]}'
            color = 'green' if t[i][1] == p[i][1] else 'red'
            t[i][0].show(ctx=ctx, title=title, title_color=color)

@delegates(plt.subplots)
def show_tuple(tup, **kwargs):
    "Display a timeseries plot from a decoded tuple"
    tup[0].show(title='unlabeled' if len(tup) == 1 else tup[1], **kwargs)
    
class TSDataLoader(NumpyDataLoader): 
    @property
    def vars(self): return self.dataset[0][0].shape[-2]
    
    @property
    def len(self): return self.dataset[0][0].shape[-1]

In [None]:
bs = 64
dsets = TSDatasets(X, y, tfms=[add(1), Categorize()], splits=splits, inplace=True)
xb,yb = TSDataLoader(dsets.train, bs=bs).one_batch()
test_eq(xb.shape, (bs, X.shape[-2], X.shape[-1]))

In [None]:
#export
_batch_tfms = ('after_item','before_batch','after_batch')

class NumpyDataLoaders(DataLoaders):
    _xblock = NumpyTensorBlock
    _dl_type = NumpyDataLoader 
    def __init__(self, *loaders, path='.', device=default_device()):
        self.loaders,self.path = list(loaders),Path(path)
        self.device = device
        
    @classmethod
    @delegates(DataLoaders.from_dblock)
    def from_numpy(cls, X, y=None, splitter=None, valid_pct=0.2, seed=0, item_tfms=None, batch_tfms=None, **kwargs):
        "Create timeseries dataloaders from arrays (X and y, unless unlabeled)"
        if splitter is None: splitter = RandomSplitter(valid_pct=valid_pct, seed=seed)
        getters = [ItemGetter(0), ItemGetter(1)] if y is not None else [ItemGetter(0)]
        dblock = DataBlock(blocks=(cls._xblock, CategoryBlock),
                           getters=getters,
                           splitter=splitter,
                           item_tfms=item_tfms,
                           batch_tfms=batch_tfms)

        source = itemify(X) if y is None else itemify(X,y)
        return cls.from_dblock(dblock, source, **kwargs)

    @classmethod
    def from_dsets(cls, *ds, path='.',  bs=64, device=None, **kwargs):
        default = (True,) + (False,) * (len(ds)-1)
        defaults = {'shuffle': default, 'drop_last': default}
        for nm in _batch_tfms:
            if nm in kwargs: kwargs[nm] = Pipeline(kwargs[nm])
        kwargs = merge(defaults, {k: tuplify(v, match=ds) for k,v in kwargs.items()})
        kwargs = [{k: v[i] for k,v in kwargs.items()} for i in range_of(ds)]
        if not is_listy(bs): bs = [bs]
        if len(bs) != len(ds): bs = bs * len(ds)
        assert len(ds) == len(kwargs) == len(bs)
        if device == None: device = default_device()
        return cls(*[cls._dl_type(d, bs=b, **k) for d,k,b in zip(ds, kwargs, bs)], path=path, device=device)

class TSDataLoaders(NumpyDataLoaders):
    _xblock = TSTensorBlock
    _dl_type = TSDataLoader

In [None]:
bs = 64
dsets = TSDatasets(X, y, tfms=[add(1), Categorize()], splits=RandomSplitter(valid_pct=.3)(y), inplace=True)
train_dl = TSDataLoader(dsets.train, bs=bs)
valid_dl = TSDataLoader(dsets.valid, bs=bs*2)
dls = TSDataLoaders(train_dl, valid_dl)
xb,yb = dls.train.one_batch()
test_eq(xb.shape, (min(bs, len(dsets.train)), X.shape[-2], X.shape[-1]))
xb,yb = dls.valid.one_batch()
test_eq(xb.shape, (min(bs*2, len(dsets.valid)), X.shape[-2], X.shape[-1]))

In [None]:
#export
def save_all(self:Learner, path='export', dls_fname='dls', model_fname='model', learner_fname='learner'):
    
    path = Path(path)
    if not os.path.exists(path): os.makedirs(path)

    # Save the dls
    torch.save(self.dls, path/dls_fname)

    # Saves the model along with optimizer
    self.model_dir = path
    self.save(model_fname)

    # Export learn without the items and the optimizer state for inference
    self.export(path/f'{learner_fname}.pkl')
    
    print(f'Learner saved:')
    print(f"path          = '{path}'")
    print(f"dls_fname     = '{dls_fname}'")
    print(f"model_fname   = '{model_fname}.pth'")
    print(f"learner_fname = '{learner_fname}.pkl'")
    
Learner.save_all = save_all
    
    
def load_learner_all(path='export', dls_fname='dls', model_fname='model', learner_fname='learner', cpu=True):
    path = Path(path)
    learn = load_learner(path/f'{learner_fname}.pkl', cpu=cpu)
    learn.load(f'{model_fname}')
    dls = torch.load(path/dls_fname)
    learn.dls = dls
    return learn

In [None]:
#export
def save_all(self, path='export', dls_fname='dls', model_fname='model', learner_fname='learner'):
    
    path = Path(path)
    if not os.path.exists(path): os.makedirs(path)
    print(path)

    # Save the dls
    torch.save(self.dls, path/f'{dls_fname}.pth')

    # Saves the model along with optimizer
    self.model_dir = path
    self.save(model_fname)

    # Export learn without the items and the optimizer state for inference
    self.export(path/f'{learner_fname}.pkl')
    
    print(f'Learner saved:')
    print(f"path          = '{path}'")
    print(f"dls_fname     = '{dls_fname}'")
    print(f"model_fname   = '{model_fname}.pth'")
    print(f"learner_fname = '{learner_fname}.pkl'")
    
Learner.save_all = save_all
    
    
def load_learner_all(path='export', dls_fname='dls', model_fname='model', learner_fname='learner', cpu=True):
    path = Path(path)
    learn = load_learner(path/f'{learner_fname}.pkl', cpu=cpu)
    learn.load(f'{model_fname}')
    dls = torch.load(path/f'{dls_fname}.pth')
    learn.dls = dls
    return learn

In [None]:
tfms  = [None, [Categorize()]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)
train_dl = TSDataLoader(dsets.train, bs=64, shuffle=True, drop_last=True, num_workers=0)
valid_dl = TSDataLoader(dsets.valid, bs=128, num_workers=0)
dls   = TSDataLoaders(train_dl, valid_dl, device=default_device())
model = InceptionTime(dls.vars, dls.c)
learn = Learner(dls, model, metrics=accuracy)
learn.fit_one_cycle(1, lr_max=1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.763685,1.789343,0.166667,00:05
1,1.390087,1.75412,0.161111,00:05
2,1.119945,1.723962,0.155556,00:05


In [None]:
learn.save_all()
del learn
learn = load_learner_all()

export
Learner saved:
path          = 'export'
dls_fname     = 'dls'
model_fname   = 'model.pth'
learner_fname = 'learner.pkl'


In [None]:
#hide
from save_nb import *
from nbdev.export import *
save_nb()
notebook2script()
last_saved(10)

<IPython.core.display.Javascript object>


Current notebook saved.

Converted 000_utils.ipynb.
Converted 001_data.ipynb.
Converted 002_core.ipynb.
Converted 100_layers.ipynb.
Converted 101_ResNet.ipynb.
Converted 102_InceptionTime.ipynb.
Converted index.ipynb.

Correct conversion!
Total elapsed time 0 s
10-04-2020 13:13:34
