In [1]:
# !pip install pymongo

In [2]:
from pymongo import MongoClient, DESCENDING
from pathlib import Path
import datetime
import json
from pydash import omit, find_index
from functools import partial
from bson import json_util
import os

from IPython.display import display, Image, JSON
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Button, ButtonStyle, AppLayout, Layout, Style

from lib.PersistentSet import PersistentSet

In [3]:
images_dir = Path('./images')
gallery_dir = Path('./gallery')
handmade_dir = Path('./handmade')
label_dir = handmade_dir/'labels'
label_dir.mkdir(exist_ok=True)
current_ids_filepath = Path('./labeling_current_ids.json')

In [4]:
mongo = MongoClient('172.17.0.1', 27017)
db = mongo['bad-vis']
posts = db['posts']
visimages = db['visimages']
vislabels = db['vislabels']

# Backup labels

In [5]:
if os.path.isfile(label_dir/'labels.json'):
    os.rename(label_dir/'labels.json', label_dir/f"labels_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.json")

In [6]:
json.dump([omit(l, '_id') for l in vislabels.find()], open(label_dir/'labels.json', 'w'), default=json_util.default)

# Labeling

In [7]:
remarks_output = widgets.Output()

In [8]:
remarks_output

Output()

In [41]:
box_height = 800

def make_label_box (image_id, next_callback=None, prev_callback=None):
    visimage = visimages.find_one({'image_id': image_id})
    vislabel = vislabels.find_one({'image_id': image_id}) or {'image_id': image_id, 'labels': [], 'remarks': ''}

    labels = set(vislabel['labels'])

    output = widgets.Output()

    layoutArgs = {
        'padding': '10px',
        'margin': '5px',
        'border': '3px solid lightblue'
    }
    ckb_layout = Layout(
        width='120px'
    )
    ckb_box_layout = lambda row, col: Layout(
        flex_flow='column wrap',
        height=f"{row*30+20}px",
        width=f"{col*125}px",
        align_content='flex-start'
    )

    form_item_layout = Layout(**layoutArgs)
    image_box_layout = Layout(height=f'{box_height}px', justify_content='center', **layoutArgs)

    imageBox = HBox([widgets.Image(value=open(visimage['image_path'], 'rb').read(), width=600, height=box_height, layout=Layout(object_position='center center', object_fit='contain'))], layout=image_box_layout)

    link = widgets.HTML(value=f"<a href='{visimage['url']}' target='_blank'>{visimage['image_id']}</a>")
    image_link = widgets.HTML(value=f"<a href='http://vpn2d.mydev:7000/projects/bad-vis-browser/{visimage['image_path']}' target='_blank'>{visimage['image_path']}</a>")

    description = widgets.Output()
    with description:
        print(f"{find_index(label_img_ids, lambda x: x == image_id)} / {len(label_img_ids)}")
        print(f"tags: {len(visimage['tags'])}")
        for t in visimage['tags']:
            print(t)

#     autoLabelCkbs = [widgets.Checkbox(value=False, description=f"{l}", disabled=True, indent=False) for l in sorted(visimage['labels']['auto'])]
#     autoLabels = VBox([widgets.Label(value=f"Auto Labels: {len(visimage['labels']['auto'])}")] + autoLabelCkbs, layout=form_item_layout)

    currentLabelsOutput = widgets.Output()
    currentLabels = VBox([currentLabelsOutput], layout=form_item_layout)
    def show_current_labels ():
        currentLabelsOutput.clear_output()
        with currentLabelsOutput:
            for l in sorted(list(labels)):
                print(l)

    infoBox = VBox([link, image_link, description, currentLabels, output])
#     infoBox = VBox([link, description, autoLabels, currentLabels, output])

    def on_ckb_change (change):
        ckb = change['owner']
        labelName = ckb.tooltip
        if change['new']:
            labels.add(labelName)
            ckb.icon = 'check'
            ckb.button_style = 'success'
        else:
            labels.remove(labelName)
            ckb.icon = ''
            ckb.button_style = ''
        show_current_labels()

    labelFormCkbsByCat = {}
    labelFormCkbsByName = {}
    for cat, names in labelOptions.items():
        ckbs = []
        for n in names:
            name = f"{cat}:{n}"
            ckb = widgets.ToggleButton(value=False, description=n.capitalize(), tooltip=name, icon='', button_style='', disabled=False, indent=False, layout=ckb_layout)
            ckb.observe(on_ckb_change, names='value')
            labelFormCkbsByName[name] = ckb
            ckbs.append(ckb)
        labelFormCkbsByCat[cat] = ckbs
    remarks = widgets.Textarea(value=vislabel.get('remarks', ''), layout=Layout(height='180px', width='240px'), disabled=False)
    labelFormCkbsByCat['remarks'] = [remarks]

    n_rows = 10
    labelForm = VBox([
        HBox([
            VBox([widgets.Label(value=f"{cat.capitalize()}")] + [VBox(labelFormCkbsByCat[cat], layout=ckb_box_layout(row=n_rows,col=(-(len(labelFormCkbsByCat[cat])//-n_rows) if cat != 'remarks' else 2)))], layout=form_item_layout)
            for cat in row
         ])
        for row in [['form', 'fault'], ['data', 'layout', 'metaphor', 'media', 'flag', 'remarks']]
    ])

    def update_label ():
        vislabels.find_one_and_update({'image_id': image_id}, {
            '$set': {
                'labels': sorted(list(labels)),
                'remarks': remarks.value,
                'updatedAt': datetime.datetime.utcnow()
            },
            '$setOnInsert': {'createdAt': datetime.datetime.utcnow()}
        }, upsert=True)
        with remarks_output:
            if remarks.value:
                print(f"{remarks.value}")
        with output:
            print(f"{visimage['short_image_id']} updated")

    btns = []

    if prev_callback:
        updateAndPrevBtn = widgets.Button(description="Save -> Prev", button_style="info")
        def update_and_prev (btn):
            update_label()
            prev_callback(image_id)
        updateAndPrevBtn.on_click(update_and_prev)
        btns.append(updateAndPrevBtn)

    updateBtn = widgets.Button(description="Save", button_style="success")
    def update (btn):
        update_label()
    updateBtn.on_click(update)
    btns.append(updateBtn)

    if next_callback:
        updateAndNextBtn = widgets.Button(description="Save -> Next", button_style="info")
        def update_and_next (btn):
            update_label()
            next_callback(image_id)
        updateAndNextBtn.on_click(update_and_next)
        btns.append(updateAndNextBtn)

    btnBox = HBox(btns)

    panelBoxLayout = Layout(
        justify_content= 'flex-start',
        align_items= 'flex-start',
        align_content= 'flex-start',
    )
    panelBox = VBox([labelForm, btnBox], layout=panelBoxLayout)

#     for ckb in autoLabelCkbs:
#         widgets.link((ckb, 'value'), (labelFormCkbsByName[ckb.description], 'value'))

    for l in labels:
        labelFormCkbsByName[l].value = True

    with output:
        print('ready')

    pane_widths = [2, 4, 1]
    return AppLayout(left_sidebar=imageBox, center=panelBox, right_sidebar=infoBox, pane_widths=pane_widths, height=f'{box_height+10}px')

In [10]:
current_ids = ''
if not os.path.isfile(current_ids_filepath):
    current_ids = PersistentSet()
    current_ids.set_file(current_ids_filepath)
else:
    current_ids = PersistentSet.load_set(current_ids_filepath)

In [11]:
visImages = [i for i in visimages.find()]
visImages.sort(key=lambda x: x['popularity_score'])
visImages.reverse()

def label_images ():
    for i in visImages:
        visLabel = vislabels.find_one({'image_id': i['image_id']})
        if visLabel == None:
            yield i['image_id']

label_img_ids = [i['image_id'] for i in visImages]

In [34]:
cell_outputs = [widgets.Output(layout=Layout(height=f'{box_height+30}px')) for i in range(len(current_ids))]

In [42]:
labelOptions = {
    'form': [
        'barchart', 'linechart', 'scatterplot', 'dotplot', 'barcodechart', 'areachart', 'histogram',
        'nodelink', 'radarchart', 'bubblechart', 'dumbbellplot',
        'piechart', 'donutchart',
        'pyramid', 'venn', 'choropleth', 'flowmap', 'map', 'chernoffface',
        'table', 'heatmap', 'treemap',
        'boxplot', 'violinplot', 'parallelcoor', 'streamgraph',
        'pictogram', 'guagechart', 'sankeydiagram', 'chorddiagram', 'sunbrust',
        'unknown'
    ],
    'data': [
        'categorical', 'quantitative', 'indexvalue', 'accumulated',
        'percentage', 'probability',
        'ordinal', 'ranking', 'sequential',
        'timeseries', 'cyclic',
        'geospatial', 'flow',
        'network', 'tree',
        'set', 'bitmap',
        'text', 'multivariate',
    ],
#     'encoding': [
#         'position', 'position(unaligned)', 'length', 'area', 'tilt',
#         'curvature', 'region', 'motion', 'shape',
#         'depth', 'volume',
#         'luminance', 'saturation', 'hue'
#     ],
#     'mark': [
#         'line', 'rectangle', 'point'
#     ],
    'layout': [
        'circular', 'infographics', 'stacked', 'map', 'juxtaposition', 'overlay', 'mixed'
    ],
    'metaphor': [
        'pictograph', 'periodictable', 'gear', 'clock'
    ],
    'media': [
        'inreallife', 'printed', 'handdrawn', 'tv', 'ads', 'news', 'NSFW'
    ],
    'fault': [
        'percentage', 'percentage:sum', 'percentage:encoding',
        'label', 'description',
        'axis', 'axis:label', 'axis:flipped', 'axis:truncated', 'axis:double', # axis:dual
        'legend',
        'color', 'color:over12',
        'scale', 'scale:log', 'scale:inconsistent', 'binning',
        'area',
        'picto:distortion', 'picto:area',
        'position', 'itemorder',
        'connection',
        '3d', 'animation',
        'cluttering', 'occulusion',
        'data:selective', 'data:questionable', 'data:missingvalues', 'data:redundant',
        'index:comparison', # index:crossbasiscomparison
        'parody', 'faultylogic', 'missingcontext',
        'confirmationbias', 'chartjunk', 'betteralternative',
        'faultystatistics', 'invalidcomparison',
        'map:population', 'invalidencoding',
        'unreadable', 'visuallyawful'
    ],
    'flag': [
        'needreview', 'invalid', 'notbad', 'starred'
    ]
}

In [43]:
def prev_id (image_id):
    i = image_id
    while i in current_ids:
        if i == label_img_ids[0]:
            break
        i = label_img_ids[max(0, find_index(label_img_ids, lambda x: x == i) - 1)]
    current_ids.persist_remove(image_id)
    current_ids.persist_add(i)
    return i

def next_id (image_id):
    i = image_id
    while i in current_ids:
        if i == label_img_ids[-1]:
            break
        i = label_img_ids[min(len(label_img_ids), find_index(label_img_ids, lambda x: x == i) + 1)]
    current_ids.persist_remove(image_id)
    current_ids.persist_add(i)
    return i

def prev_img (output, image_id):
    show_img(prev_id(image_id), output)

def next_img (output, image_id):
    show_img(next_id(image_id), output)

def show_img (image_id, output=None):
    output.clear_output()
    with output:
        display(make_label_box(image_id, next_callback=partial(next_img, output), prev_callback=partial(prev_img, output)))

for i, current_id in zip(range(len(current_ids)), list(current_ids)):
    show_img(current_id, cell_outputs[i])

In [37]:
VBox(cell_outputs)

VBox(children=(Output(layout=Layout(height='830px'), outputs=({'output_type': 'display_data', 'data': {'text/p…

In [16]:
# visImages = [i for i in visimages.find()]
# visImages.sort(key=lambda x: x['popularity_score'])
# visImages.reverse()

# def label_images ():
#     for i in visImages:
#         visLabel = vislabels.find_one({'image_id': i['image_id']})
#         if visLabel == None:
#             yield i['image_id']

# cell_output = widgets.Output()
# label_img_ids = [i for i in label_images()]
# current = {'image_id': label_img_ids[0]}

In [17]:
# def prev_img (image_id):
#     show_img(label_img_ids[max(0, find_index(label_img_ids, lambda x: x == image_id) - 1)])

# def next_img (image_id):
#     show_img(label_img_ids[min(len(label_img_ids), find_index(label_img_ids, lambda x: x == image_id) + 1)])

# def show_img (image_id):
#     current['image_id'] = image_id
#     cell_output.clear_output()
#     with cell_output:
#         display(make_label_box(image_id, next_callback=next_img, prev_callback=prev_img))

# show_img(current['image_id'])

In [18]:
# cell_output

In [19]:
# label_img = label_images()

In [20]:
# for i in range(3):
#     try:
#         next_image_id = next(label_img)
#     except StopIteration:
#         print('StopIteration')
#         break

#     display(make_label_box(next_image_id))

In [21]:
# def make_colored_text (color, text):
#     return r'\(\color{' + color + '} {' + text + '}\)'