In [1]:
import pandas as pd

In [2]:
docs = [
    'Tiger Teeth – WALK THE MOON',
    'Wolf Like Me – TV On The Radio',
    'Duck – Zeedym',
    'Lone Wolf and Cub – Thundercat'
]

In [3]:
classes = ['about_tiger', 'about_wolf', 'about_duck']

# Use input

The crudest solution – show user docs one by one and at each step ask for label for a doc

In [4]:
from IPython.display import clear_output
import re

In [5]:
def get_label(doc, labels, multi=False):
    clear_output()
    mapper = {str(i): label for i, label in enumerate(labels)}
    legend = "\n".join([f'{index}\t{label}' for index, label in mapper.items()])
    if multi:
        legend += "\nInput comma-separated list for multiple labels"
    user_input = input(f'Select class for "{doc}"\n{legend}\n({"/".join(mapper.keys())})?')
    response = user_input.strip()
    label = ''
    if multi:
        keys = re.split(r",\s*", response)
        label = [mapper[key] for key in keys]
    else:
        key = response
        label = mapper.get(key)
    if label:
        return label
    # loop if got something wrong
    return get_label(doc, labels, multi)

In [6]:
labels = [get_label(doc, classes) for doc in docs]

Select class for "Lone Wolf and Cub – Thundercat"
0	about_tiger
1	about_wolf
2	about_duck
(0/1/2)?1


In [7]:
pd.DataFrame(list(zip(docs, labels)), columns=['docs', 'labels'])

Unnamed: 0,docs,labels
0,Tiger Teeth – WALK THE MOON,about_tiger
1,Wolf Like Me – TV On The Radio,about_wolf
2,Duck – Zeedym,about_duck
3,Lone Wolf and Cub – Thundercat,about_wolf


In [8]:
labels = [get_label(doc, classes, multi=True) for doc in docs]

Select class for "Lone Wolf and Cub – Thundercat"
0	about_tiger
1	about_wolf
2	about_duck
Input comma-separated list for multiple labels
(0/1/2)?0,1


In [9]:
pd.DataFrame(list(zip(docs, labels)), columns=['docs', 'labels'])

Unnamed: 0,docs,labels
0,Tiger Teeth – WALK THE MOON,"[about_tiger, about_wolf]"
1,Wolf Like Me – TV On The Radio,"[about_tiger, about_duck]"
2,Duck – Zeedym,[about_duck]
3,Lone Wolf and Cub – Thundercat,"[about_tiger, about_wolf]"


# Use ipython widgets

Something similar to usual spreadsheet software, where we have each row represent the doc and control next to it to select one or multiple classes

In [10]:
import pandas as pd
import ipywidgets as widgets
import time
from IPython.display import display
from IPython.display import display_html, clear_output

In [11]:
class CheckBoxGroup:
    def __init__(self, options):
        self.value_mapper = {
            label: widgets.Checkbox(
                value=False,
                description=label,
                disabled=False) for label in options
        }
        self.elements = list(self.value_mapper.values())

    @property
    def value(self):
        return [label for label, element in self.value_mapper.items() if element.value]
    
    def render(self):
        return self.elements

class RadioButtonsWrapper:
    def __init__(self, options):
        self.elements = widgets.RadioButtons(
            options=options,
            disabled=False
        )
    @property
    def value(self):
        return self.elements.value

    def render(self):
        return [self.elements]


        
def display_docs(docs, labels, multi=False):
    rows = []
    value_holders = []
    for i, doc in enumerate(docs):
        label = widgets.Label(doc)
        element = CheckBoxGroup(labels) if multi else RadioButtonsWrapper(labels)
        value_holders.append(element)
        row = widgets.HBox([label, *element.render()])
        row.layout.display = 'flex'
        label.layout.flex = '1 0 100px'
        for element in element.render():
            element.layout.flex = '0 0 100px'
        rows.append(row)
    
    table = widgets.VBox(rows)
    display(table)
    
    def get_response():
        return pd.DataFrame(list(zip(docs, [c.value for c in value_holders])), columns=['docs', 'labels'])
    return get_response

In [12]:
get_response = display_docs(docs, classes)

VBox(children=(HBox(children=(Label(value='Tiger Teeth – WALK THE MOON', layout=Layout(flex='1 0 100px')), Rad…

In [13]:
get_response()

Unnamed: 0,docs,labels
0,Tiger Teeth – WALK THE MOON,about_tiger
1,Wolf Like Me – TV On The Radio,about_tiger
2,Duck – Zeedym,about_duck
3,Lone Wolf and Cub – Thundercat,about_wolf


In [14]:
get_response = display_docs(docs, classes, multi=True)

VBox(children=(HBox(children=(Label(value='Tiger Teeth – WALK THE MOON', layout=Layout(flex='1 0 100px')), Che…

In [15]:
get_response()

Unnamed: 0,docs,labels
0,Tiger Teeth – WALK THE MOON,"[about_tiger, about_wolf]"
1,Wolf Like Me – TV On The Radio,[about_wolf]
2,Duck – Zeedym,[about_duck]
3,Lone Wolf and Cub – Thundercat,"[about_tiger, about_wolf]"


# Using [ipyannotate](https://github.com/natasha/ipyannotate)

In [16]:
from ipyannotate import annotate
from ipyannotate.buttons import ValueButton, NextButton, BackButton

buttons = [
    ValueButton(
        icon="🐯",
        value="about_tiger",
        shortcut="s"
    ),
    ValueButton(
        icon="🐺",
        value="about_wolf",
        shortcut="w"
    ),
    ValueButton(
        icon="🦆",
        value="about_duck",
        shortcut="d"
    ),
    BackButton(),
    NextButton()
]
annotation = annotate(docs, buttons=buttons)
annotation

Annotation(canvas=OutputCanvas(), progress=Progress(atoms=[<ipyannotate.progress.Atom object at 0x114844978>, …

In [17]:
annotation.tasks

[Task(output='Tiger Teeth – WALK THE MOON', value=about_tiger),
 Task(output='Wolf Like Me – TV On The Radio', value=about_wolf),
 Task(output='Duck – Zeedym', value=about_duck),
 Task(output='Lone Wolf and Cub – Thundercat', value=about_wolf)]

> if you need multiple labels per row, pass `multi=True` to `annotate()`. Note that you will need to navigate between samples manually, as previously it was done after label assignment

In [18]:
buttons = [
    ValueButton(
        icon="🐯",
        value="about_tiger",
        shortcut="s"
    ),
    ValueButton(
        icon="🐺",
        value="about_wolf",
        shortcut="w"
    ),
    ValueButton(
        icon="🦆",
        value="about_duck",
        shortcut="d"
    ),
    BackButton(),
    NextButton()
]
annotation = annotate(docs, buttons=buttons, multi=True)
annotation

Annotation(canvas=OutputCanvas(), progress=Progress(atoms=[<ipyannotate.progress.Atom object at 0x1148e3a20>, …

In [19]:
annotation.tasks

[MultiTask(output='Tiger Teeth – WALK THE MOON', value={'about_tiger', 'about_duck'}),
 MultiTask(output='Wolf Like Me – TV On The Radio', value={'about_wolf'}),
 MultiTask(output='Duck – Zeedym', value={'about_duck'}),
 MultiTask(output='Lone Wolf and Cub – Thundercat', value={'about_tiger', 'about_wolf'})]