In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import ipywidgets as widgets
from PIL import Image
from IPython.display import display
from IPython.display import clear_output
from extras import *

Using TensorFlow backend.


## UI

In [2]:
class Labeler():
    def __init__(self, imgs_array, page_size=3):
        self.imgs_array = imgs_array
        self.page_size  = page_size
        
        self.possitives = set([])
        self.negatives = set([])
        self.current_page_widget = None
        self.possitives_widget = widgets.IntText(description='Yays:', value=0)
        self.negatives_widget = widgets.IntText(description='Nays:', value=0)
        self.get_vote_widget_row()
        self.widget = widgets.Widget()
        
        
    def show(self):
        self.widget.close()
        next_button = widgets.Button(description='More!', button_style='info')
        next_button.on_click(self.next_page)
        self.widget = widgets.VBox([
            self.get_vote_widget_row(),
            next_button,
            widgets.HBox([self.possitives_widget, self.negatives_widget])
        ])
        display(self.widget)
        
    def next_page(self, btn):
        self.show()
    
    def get_image_widget(self, img_array, width='340px'):
        wimg = widgets.Image(
            value=jpeg_bytes_from_nparray(img_array),
            width=width,
        )

        return wimg
    
    def possitive_clicked(self, btn):
        if btn.img_id in self.negatives:
            self.negatives.remove(btn.img_id)
        self.possitives.add(btn.img_id)
        self.update_possitive_negative_count()
        
    def negative_clicked(self, btn):
        if btn.img_id in self.possitives:
            self.possitives.remove(btn.img_id)
        self.negatives.add(btn.img_id)
        self.update_possitive_negative_count()
        
    def update_possitive_negative_count(self):
        self.possitives_widget.value = len(self.possitives)
        self.negatives_widget.value = len(self.negatives)
        
        
    def get_vote_widget(self, img_array, img_id, width):
        wimg = self.get_image_widget(img_array, width)
        btn_yes = widgets.Button(
            description=':)',
            disabled=False,
            button_style='success',
            tooltip=':)'
        )
        btn_yes.img_id = img_id    
        btn_yes.on_click(self.possitive_clicked)
        
        btn_no = widgets.Button(
            description=':(',
            disabled=False,
            button_style='danger',
            tooltip=':('
        )
        btn_no.img_id = img_id
        btn_no.on_click(self.negative_clicked)

        return widgets.VBox([ 
            wimg, 
            widgets.HBox([
                    btn_yes, btn_no
            ], layout=widgets.Layout(justify_content='space-around'))
        ], layout=widgets.Layout(width=width, padding='10px 0px 10px 0px', background='#0f0', justify_content='flex-end'))

    def get_vote_widget_row(self):
#         indexes = np.random.randint(self.imgs_array.size, size=self.page_size)
        n_samples = self.imgs_array.size
        labeled_indexes = np.concatenate( [ list(self.possitives), list(self.negatives)] )
        unlabeled_indexes = np.setdiff1d(np.arange(n_samples), labeled_indexes)
        indexes = np.random.choice(unlabeled_indexes, self.page_size, replace=False)
        
        return widgets.Box(
            [self.get_vote_widget(self.imgs_array[i], i, '300px') for i in indexes], 
            layout=widgets.Layout(display='flex', flex_flow='row wrap', justify_content='space-around', align_items='flex-end')
        )

# Go!

### In case of relabeling

In [3]:
# df = pd.read_pickle('labeled_data.pkl')
# pos_idx = np.where(df.toni_y == 1)
# neg_idx = np.where(df.toni_y == 0)
# lab = Labeler(df.original.as_matrix(), page_size=9)
# lab.possitives = set(pos_idx[0])
# lab.negatives = set(neg_idx[0])
# lab.show()

### Otherwise

In [4]:
df = pd.read_pickle('raw_data.pkl')

In [6]:
lab = Labeler(df.original.as_matrix(), page_size=6)
lab.show()

## Store labels

In [9]:
df['mariona_y'] = None
df.mariona_y.iloc[list(lab.possitives)] = 1.0
df.mariona_y.iloc[list(lab.negatives)] = 0.0

## Save dataframe

In [10]:
df.to_pickle('labeled_data.pkl')

In [12]:
df.mariona_y[df.mariona_y.notnull()].value_counts()

0.0    320
1.0    185
Name: mariona_y, dtype: int64

In [13]:
185/320.0

0.578125