In [None]:
#default_exp labeller.core

In [None]:
#export
from fastai2.basics import *
from lazylabel.basics import *
from functools import wraps

# Labeller
> `Labeller` wraps `subscribe` and saves the returned value of wrapped functions in a attribute called `labels` in the original object. 

In [None]:
#export
class UniqueList(L):
    def append(self, o):
        if o not in self.items: super().append(o)

In [None]:
#export
class Labeller:
    def __init__(self):
        self.lfs_order = UniqueList()
        self.subs = L()
        
    def __call__(self, tfm):
        def _inner(f): return self.register_func(tfm, f)
        return _inner
    
    def register_func(self, tfm, f):
        self.lfs_order.clear()
        sub = subscribe(tfm, self.lfs_order)
        self.subs.append(sub)
        return sub(self._add_label(f))
    
    def register_funcs(self, tfm, fs):
        for f in L(fs): self.register_func(tfm, f)
    
    def reset(self):
        for sub in self.subs: sub.cancel()
        self.subs.clear()
        self.lfs_order.clear()
    
    def listen(self, v):
        for sub in self.subs: sub.listen = v
    
    def _add_label(self, f):
        @wraps(f)
        def _inner(x):
            label = f(x)
            x = add_attr(x, 'labels', [])
            x.labels.append(label)
            return x
        return _inner

In [None]:
#export
class Labeller:
    def __init__(self): self.subs = L()
        
    def __call__(self, tfm):
        def _inner(f): return self.register_func(tfm, f)
        return _inner
    
    def register_func(self, tfm, f, *pre):
        sub = tfm.listen(*pre)(self._add_label(f))
        self.subs.append(sub)
        return sub
    
    def register_funcs(self, tfm, fs):
        for f in L(fs): self.register_func(tfm, f)
    
    def _add_label(self, f):
        @wraps(f)
        def _inner(x): x.labels.append(f(x))
        return _inner

Tests labeller with arbitrary transforms

In [None]:
ABSTAIN,CAT1,CAT2,CAT3 = 'abstain','neg1','neg2','intdiv1'
vocab = [ABSTAIN,CAT1,CAT2]

In [None]:
@Transform
def neg(x:Tensor): return -x
class IntDiv(Transform):
    def encodes(self, x:int): return x//2

In [None]:
labeller = Labeller()
int_div = IntDiv()

In [None]:
def labeller_cat1(x): return CAT1
def labeller_cat2(x): return CAT2
labeller.register_funcs(neg, [labeller_cat1, labeller_cat2])

In [None]:
@labeller(int_div)
def labeller_cat3(x): return CAT3

In [None]:
pipe = Pipeline(neg)
test_eq(pipe(tensor(2)).labels, [CAT1, CAT2])
# test_eq(labeller.lfs_order, ['labeller_cat1', 'labeller_cat2'])

In [None]:
test_eq(pipe(2).labels, [])

Labelling functions should only be applied based on type dispatch of the transforms.

In [None]:
pipe = Pipeline([neg, int_div])
test_eq(pipe(2).labels, [CAT3])
# test_eq(labeller.lfs_order, ['labeller_cat3']) # TODO: failing, old funcs still in the list

Reset subscriptions

In [None]:
@patch
def reset(self:Labeller):
    for sub in self.subs: sub.cancel()
    self.subs.clear()

In [None]:
labeller.reset()
test_eq(pipe(tensor(2)).labels, [])

# Find

Find samples with specific labels

In [None]:
#export
@patch
def _find(self:Labeller, dl, lfs_idxs, lbl_idxs, reduction=operator.and_):
    matches,total = [],0
    old_shuffle, dl.shuffle = dl.shuffle, False
    for b in dl:
        xb,yb = split_batch(dl, b)
        masks = [xb[:,i]==x for i,x in zip(lfs_idxs,lbl_idxs)]
        mask = reduce(reduction, masks)
        idxs = np.array(mask2idxs(mask))
        matches.extend(idxs+total)
        total += find_bs(xb)
    dl.shuffle = old_shuffle
    return matches

In [None]:
#export
@patch
def find(self:Labeller, dl, vocab, lfs, lbls, reduction=operator.and_):
    vocab = CategoryMap(vocab)
    lfs_idxs = [self.lfs_order.index(lf) for lf in lfs]
    lbl_idxs = [vocab.o2i[lbl] for lbl in lbls]
    return self._find(dl, lfs_idxs, lbl_idxs, reduction)

In [None]:
x = tensor([[0, 0], [0, 1], [0, 2], [1, 0], [0, 2], [1, 0], [1, 2], [0, 0]])
dset = Datasets(x)
dls = DataLoaders.from_dsets(dset, bs=2, drop_last=False)

In [None]:
labeller = Labeller()
labeller.lfs_order = [0,1]

In [None]:
Categorize??

In [None]:
Categorize(vocab).vocab[1]

1

In [None]:
vocab = [0,1,2]
idxs = labeller.find(dls.train, vocab=vocab, lfs=[0,1], lbls=[1,2])
test_eq(idxs, [6])

Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/lgvaz/anaconda3/envs/dl/lib/python3.7/multiprocessing/queues.py", line 236, in _feed
    obj = _ForkingPickler.dumps(obj)
  File "/home/lgvaz/anaconda3/envs/dl/lib/python3.7/multiprocessing/queues.py", line 236, in _feed
    obj = _ForkingPickler.dumps(obj)
  File "/home/lgvaz/anaconda3/envs/dl/lib/python3.7/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'lazylabel.core.ProxyTuple'>: attribute lookup ProxyTuple on lazylabel.core failed
  File "/home/lgvaz/anaconda3/envs/dl/lib/python3.7/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'lazylabel.core.ProxyTuple'>: attribute lookup ProxyTuple on lazylabel.core failed
Traceback (most recent call last):
  File "/home/lgvaz/anaconda3/envs/dl/lib/python3.7/multiprocessing/queues.py", line 236, in _feed
    

AssertionError: ==:
[]
[6]

## Tasks labels helper

Extract the `labels` from a `TfmdLists`.

In [None]:
#export
def tasks_labels(tls, vocab, splits=None, lazy=False):
    tasks = TfmdLists(tls, [AttrGetter('labels'), MultiCategorize(vocab)], splits=splits)
    if not lazy: tasks.cache()
    return tasks

## Export -

In [None]:
from nbdev.export import notebook2script
notebook2script()

Converted 00_core.ipynb.
Converted 02_labeller.core.ipynb.
Converted 02a_labeller.metrics.ipynb.
Converted 03_model.majority_label_voter.ipynb.
Converted 05_text.core.ipynb.
Converted 06_text.labellers.ipynb.
Converted index.ipynb.
