In [None]:
#default_exp labeller.metrics

In [None]:
#export
from fastai2.basics import *
from pigboat.basics import *
from pigboat.labeller.core import Labeller

# Labeller metrics
> Utility metrics like `Covereage`

In [None]:
#export
class LabelMetric:
    def reset(self): raise NotImplementedError
    def accumulate(self, xb): raise NotImplementedError
    @property
    def value(self): raise not ImplementedError
    @property
    def name(self): return self.__class__.__name__

In [None]:
#export
class ValidLabelMetric(LabelMetric):
    def accumulate(self, xb, yb): raise NotImplementedError

In [None]:
#export
class Coverage(LabelMetric):
    def reset(self): self.total,self.count = 0,0
    def accumulate(self, xb):
        #TODO: Hardcoded 0 for abstain, can be wrong
        self.total += find_bs(xb)
        bcount = (xb!=0).sum(axis=0)
        self.count += bcount
    @property
    def value(self): return (self.count.float()/self.total).tolist()

In [None]:
#export
class Polarity(LabelMetric):
    def reset(self): self._unique = None
    def accumulate(self, xb):
        bpol = L(set(t.unique().tolist()) for t in xb.unbind(dim=1))
        if self._unique is None: self._unique = bpol
        else: 
            for i in range_of(bpol): self._unique[i].update(bpol[i])
    @property
    def value(self): return self.unique.map(len)
    
    @property
    def unique(self): 
        unique = self._unique.copy()
        unique.map(self._discard_abstain)
        return unique
    def _discard_abstain(self, o): return o.discard(0) #TODO: abstain

In [None]:
#export
class CountCorrect(ValidLabelMetric):
    def reset(self): self.count = 0
    def accumulate(self, xb, yb): self.count += (xb==yb).sum(dim=0) # TODO: abstain
    @property
    def value(self): return self.count
    @property
    def name(self): return 'Correct'

In [None]:
#export
class CountIncorrect(CountCorrect):
    def accumulate(self, xb, yb):
        self.count += ((xb!=0)&(xb!=yb)).sum(dim=0) # TODO: abstain
    @property
    def name(self): return 'Incorrect'

In [None]:
#export
class LabelAccuracy(ValidLabelMetric):
    def reset(self): self.count,self.total = 0,0
    def accumulate(self, xb, yb): 
        self.total += (xb!=0).sum(dim=0) # TODO: abstain
        self.count += (xb==yb).sum(dim=0)
    @property
    def value(self): return self.count.float()/self.total
    @property
    def name(self): return 'Accuracy'

In [None]:
#export
defaults.labeller_metrics = [Coverage, Polarity, LabelAccuracy, CountCorrect, CountIncorrect]

In [None]:
#export
_old_labeller_init = Labeller.__init__
@patch
def __init__(self:Labeller, abstain='abstain', metrics=None):
    _old_labeller_init(self, abstain=abstain)
    self.metrics = L(instantiate(o) for o in L(metrics)+L(defaults.labeller_metrics))

In [None]:
#export
def _split(dl, b):
    i = getattr(dl, 'n_inp', 1 if len(b)==1 else len(b)-1)
    return b[:i],b[i:]

In [None]:
#export
@patch
def summary(self:Labeller, dl):
    metrics = self.metrics
    for metric in metrics: metric.reset()
    for b in dl:
        xb,yb = map(detuplify, _split(dl, b))
        for metric in metrics:
            if not isinstance(metric, ValidLabelMetric): metric.accumulate(xb); continue
            if yb is not None:                   metric.accumulate(xb,yb.view(-1,1)) # Safe to add dim in yb?
    if yb is None: metrics = metrics.filter(lambda o: not isinstance(o, ValidLabelMetric))
    data = dict(metrics.map(lambda o: (o.name, o.value)))
    return pd.DataFrame(data, index=self.func_order)

In [None]:
x = tensor([[0, 0], [0, 1], [0, 2], [1, 0], [0, 2], [1, 0], [1, 2], [0, 0]])
dset = Datasets(x)
dls = DataLoaders.from_dsets(dset, bs=2, drop_last=False)

In [None]:
labeller = Labeller()
labeller.func_order = [0,1]

In [None]:
summary = labeller.summary(dls.train)
summary

Unnamed: 0,Coverage,Polarity
0,0.375,1
1,0.5,2


In [None]:
#hide
df = pd.DataFrame({'Coverage': [0.375, 0.5], 'Polarity': [1, 2]})
pd.testing.assert_frame_equal(df, summary)

If the data has labels

In [None]:
y = tensor([1, 1, 1])
dset = Datasets(L(zip(x[-3:], y)), tfms=[[ItemGetter(0)], [ItemGetter(1)]])
dls = DataLoaders.from_dsets(dset, bs=2, drop_last=False)

In [None]:
summary = labeller.summary(dls.train)
summary

Unnamed: 0,Coverage,Polarity,Accuracy,Correct,Incorrect
0,0.666667,1,1.0,2,0
1,0.333333,1,0.0,0,1


## Export -

In [None]:
from nbdev.export import notebook2script
notebook2script()

Converted 00_core.ipynb.
Converted 01_data.transforms.ipynb.
Converted 02_labeller.core.ipynb.
Converted 02a_labeller.metrics.ipynb.
Converted 03_model.majority_label_voter.ipynb.
Converted 05_text.core.ipynb.
Converted 06_text.labellers.ipynb.
Converted Untitled-Copy1.ipynb.
Converted index.ipynb.
Converted resume-Copy1.ipynb.
Converted resume.ipynb.
Converted rx_transform.ipynb.
Converted rx_transform2-Copy1.ipynb.
Converted rx_transform2.ipynb.
