In [1]:
# !pip install pymongo
# !pip install termcolor

In [2]:
from pymongo import MongoClient, DESCENDING
from pathlib import Path
import datetime
# import json
import simplejson
from pydash import omit, find_index, merge, group_by
from functools import partial
from bson import json_util
import os
import json

from termcolor import colored
from IPython.display import display, Image, JSON
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Box, Button, ButtonStyle, AppLayout, Layout, Style

from lib.PersistentSet import PersistentSet

In [3]:
images_dir = Path('../images')
gallery_dir = Path('../')
handmade_dir = Path('./handmade')
label_dir = handmade_dir/'labels'
label_dir.mkdir(exist_ok=True)
current_ids_filepath = Path('./tmp/labeling_current_ids.json')

In [4]:
mongo = MongoClient('172.17.0.1', 27017)
db = mongo['bad-vis']
posts = db['posts']
imagemeta = db['imagemeta']
imagelabel = db['imagelabel']
vislabels = db['vislabels']

In [5]:
image_url_prefix = 'http://vpn2d.mydev:7000'
# image_url_prefix = 'http://localhost:7000'

# Backup labels

In [6]:
if os.path.isfile(label_dir/'labels.json'):
    os.rename(label_dir/'labels.json', label_dir/f"labels_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.json")

In [7]:
json.dump([omit(l, '_id') for l in vislabels.find()], open(label_dir/'labels.json', 'w'), default=json_util.default)

# Maintenance

In [8]:
# for label in vislabels.find():
#     visimage = imagelabel.find_one({'image_id': label['image_id']})
#     if not visimage:
# #         print(f"missing {label['image_id']}")
#         duplicated = [i for i in imagelabel.find({'duplicated_images': label['image_id']})]
#         if len(duplicated) > 1:
#             pass
# #             print(f"found more than one duplicated {label['image_id']} {duplicated}")
#         if not duplicated:
# #             pass
#             print(f"missing in duplicated either {label['image_id']}")
#             print(json.dumps(omit(label, '_id'), default=datetime.datetime.isoformat))
#         else:
#             duplicated = duplicated[0]
# #             print(f"found in duplicated {label['image_id']} {duplicated['image_id']}")

In [9]:
# for label in vislabels.find():
#     if 'tumblr' in label['image_id']:
#         continue

#     visimage = imagelabel.find_one({'image_id': label['image_id']})
#     if not visimage:
#         visimage = imagelabel.find_one({'duplicated_images': label['image_id']})

#     if not visimage:
#         print(f"missing {label['image_id']}")
#     else:
#         label['image_id'] = visimage['image_id']
#         label['phash'] = visimage['phash']
#         vislabels.update_one({'_id': label['_id']}, {'$set': {'image_id': label['image_id'], 'phash': label['phash']}})

# Labeling

In [10]:
remarks_output = widgets.Output()

In [11]:
remarks_output

Output()

In [12]:
box_height = 1200

In [13]:
def find_post (image_id):
    return posts.find_one({'post_id': imagemeta.find_one({'image_id': image_id})['post_id']})

In [14]:
def find_label (image_id):
    image = imagelabel.find_one({'image_id': image_id})
    if not image:
        return None
    image_ids = [image_id] + [i for i in image['duplicated_images']]
    visLabels = [l for l in vislabels.find({'image_id': {'$in': image_ids}})]
    if len(visLabels) > 1:
        print(f"labels warning: more than 1 label {image_ids} {visLabels}")
    return visLabels[0] if visLabels else None

In [15]:
def find_image (image_id):
    return imagelabel.find_one({'$or': [{'image_id': image_id}, {'duplicated_images': image_id}]})

In [16]:
def find_images_by_tags (tags=[], limit=-1):
    labels = vislabels.find({'labels': {'$all': tags}} if len(tags) > 0 else {})
    if limit != -1:
        labels = labels.limit(limit)
    images = []
    for l in labels:
        image = find_image(l['image_id'])
        if image:
            images.append({**image, 'labels': l['labels']})
    return images

## Image Box

In [17]:
def make_image_box (visImage, width=600, height=800, layoutArgs={}):
#     image = widgets.Image(value=open(gallery_dir/visImage['labelling_path'], 'rb').read(), width=600, layout=Layout(object_position='center center', object_fit='contain'))
    image = widgets.Image(value=open(gallery_dir/visImage['labelling_path'], 'rb').read(), width=600, height=height, layout=Layout(object_position='center center', object_fit='contain'))
    boxItems = {
        'image': image
    }

    image_box_layout = Layout(justify_content='center', **layoutArgs)
    image_box_layout = Layout(height=f'{box_height-500}px', justify_content='center', **layoutArgs)
    imageBox = HBox([image], layout=image_box_layout)

    return (imageBox, boxItems)

## Info Box

In [18]:
def make_link (url, text):
    return widgets.HTML(value=f"<a href='{url}' target='_blank'>{text}</a>")

def make_info_box (visImage, layoutArgs={}):
    output = widgets.Output()

    link = make_link(find_post(visImage['image_id'])['url'], visImage['image_id'])
    dp_links = [
        make_link(find_post(dp_image_id)['url'], dp_image_id)
        for dp_image_id in visImage['duplicated_images']
    ]
    image_link = make_link(f"{image_url_prefix}/projects/bad-vis-images/{visImage['labelling_path']}", visImage['labelling_path'])

    description = widgets.Output(layout=Layout(**layoutArgs))
    with description:
        print(f"{label_img_ids.index(visImage['image_id'])} / {len(label_img_ids)}")
        print(f"popularity_score: {visImage['popularity_score']}")
        print(f"phash: {visImage['phash']}")
        print(f"tags: {len(visImage['tags'])}")
        for t in visImage['tags']:
            print(t)

    currentLabelsOutput = widgets.Output(layout=Layout(**layoutArgs))

    outputs = {
        'description': description,
        'currentLabelsOutput': currentLabelsOutput,
        'output': output
    }

    infoBox = VBox([description, currentLabelsOutput, output, link, *dp_links, image_link])

    return (infoBox, outputs)

## Checkboxes

In [19]:
def ckb_box_layout (rows, cols, width):
    return Layout(
        flex_flow='column wrap',
        height=f"{rows*32}px",
        width=f"{cols*(width + 5)}px",
        align_content='flex-start'
    )

def make_ckbs (title, options, n_rows=10, layoutArgs={}, on_ckb_change=None, grouping=False, sorting=True, ckb_width=100):
    ckb_layout = Layout(
        width=f"{ckb_width}px"
    )

    layout = Layout(**layoutArgs)
    border_style = layoutArgs.get('border', '3px solid lightblue')
    layout.border = border_style.replace(border_style.split(' ')[-1], 'orange')

    form = ''
    subcats = []
    ckbsByName = {}

    def ckb_handler (change):
        ckb = change['owner']
        if change['new']:
            ckb.icon = 'check'
            ckb.button_style = 'success'
        else:
            ckb.icon = ''
            ckb.button_style = ''
        if on_ckb_change:
            on_ckb_change(change)
        if len([v for k, v in ckbsByName.items() if v.value]) > 0:
            form.layout.border = form.layout.border.replace(form.layout.border.split(' ')[-1], 'lightblue')
        else:
            form.layout.border = form.layout.border.replace(form.layout.border.split(' ')[-1], 'orange')

    grouped = group_by(options, lambda o: o.get('subcategory', '')) if grouping else {'': options}
    for subcategory, options in grouped.items():
        ckbs = []
        ordered_options = reversed(sorted(options, key=lambda x: x['count'])) if sorting else options
        for n in ordered_options:
            name = n['tag']
            ckb = widgets.ToggleButton(value=False, description=n['name'], tooltip=name, icon='', button_style='', disabled=False, indent=False, layout=ckb_layout)
            ckb.observe(ckb_handler, names='value')
            ckbs.append(ckb)
            ckbsByName[name] = ckb
        subcats.append(VBox(ckbs, layout=ckb_box_layout(rows=n_rows, cols=(-(len(ckbs) // -n_rows)), width=ckb_width)))

    form = VBox([
#         widgets.Label(value=f"{title.capitalize()}"),
        HBox(subcats)
    ], layout=layout)

    return (form, ckbsByName)

def make_remarks (n_rows=10, layoutArgs={}):
    form = ''
    remarks = ''

    layout = Layout(**layoutArgs)
    border_style = layoutArgs.get('border', '3px solid lightblue')
    layout.border = border_style.replace(border_style.split(' ')[-1], 'orange')

    def remarks_handler (change):
        if remarks.value and remarks.value != '':
            form.layout.border = form.layout.border.replace(form.layout.border.split(' ')[-1], 'lightblue')
        else:
            form.layout.border = form.layout.border.replace(form.layout.border.split(' ')[-1], 'orange')

    remarks = widgets.Textarea(value='', layout=Layout(height=f"{n_rows*30-20}px", width=f'240px'), disabled=False)
    remarks.observe(remarks_handler, names='value')
    form = VBox([widgets.Label(value=f"{'remarks'.capitalize()}"), remarks], layout=layout)
    return (form, remarks)

def make_ckb_box (options, n_rows=10, layoutArgs={}, on_ckb_change=None):

    formCkbsBox, formCkbsByName = make_ckbs('form', options['form'], 7, layoutArgs, on_ckb_change, grouping=True, ckb_width=140)
    layoutCkbsBox, layoutCkbsByName = make_ckbs('layout', options['layout'], 7, layoutArgs, on_ckb_change)
    dataCkbsBox, dataCkbsByName = make_ckbs('data', options['data'], n_rows, layoutArgs, on_ckb_change, sorting=False)
    domainCkbsBox, domainCkbsByName = make_ckbs('domain', options['domain'], n_rows, layoutArgs, on_ckb_change)
    mediaCkbsBox, mediaCkbsByName = make_ckbs('media', options['media'], n_rows, layoutArgs, on_ckb_change)
    flagCkbsBox, flagCkbsByName = make_ckbs('flag', options['flag'], n_rows, layoutArgs, on_ckb_change)

    faultCkbsBox, faultCkbsByName = make_ckbs('fault', options['fault'], 10, layoutArgs, on_ckb_change, grouping=True, ckb_width=180)
    effectCkbsBox, effectCkbsByName = make_ckbs('effect', options['effect'], n_rows, layoutArgs, on_ckb_change)

    remarksBox, remarks = make_remarks(n_rows, layoutArgs=layoutArgs)

    boxItemsByName = {}
    for d in [{'remarks': remarks}, formCkbsByName, faultCkbsByName, dataCkbsByName, domainCkbsByName, layoutCkbsByName, effectCkbsByName, mediaCkbsByName, flagCkbsByName]:
        for k, v in d.items():
            boxItemsByName[k] = v

    boxesByName = {
        'remarks': remarksBox,
        'form': formCkbsBox,
        'layout': layoutCkbsBox,
        'data': dataCkbsBox,
        'domain': domainCkbsBox,
        'media': mediaCkbsBox,
        'flag': flagCkbsBox,
        'fault': faultCkbsBox,
        'effect': effectCkbsBox,
    }

    return (VBox([
        HBox([formCkbsBox, layoutCkbsBox]),
        HBox([dataCkbsBox, domainCkbsBox, mediaCkbsBox, flagCkbsBox, effectCkbsBox, remarksBox]),
        HBox([faultCkbsBox]),
    ]), boxItemsByName, boxesByName)

## Buttons

In [20]:
def make_btn_box (on_update=None, on_prev=None, on_next=None):
    btns = []

    if on_prev:
        prevBtn = widgets.Button(description='Prev', button_style='warning', layout=Layout(width='60px'))
        def go_prev (btn):
            if on_prev:
                on_prev()
        prevBtn.on_click(go_prev)
        btns.append(prevBtn)

        updateAndPrevBtn = widgets.Button(description='Save -> Prev', button_style='info')
        def update_and_prev (btn):
            if on_update:
                on_update()
            if on_prev:
                on_prev()
        updateAndPrevBtn.on_click(update_and_prev)
        btns.append(updateAndPrevBtn)

    updateBtn = widgets.Button(description='Save', button_style='success')
    def update (btn):
        if on_update:
            on_update()
    updateBtn.on_click(update)
    btns.append(updateBtn)

    if on_next:
        updateAndNextBtn = widgets.Button(description='Save -> Next', button_style='info')
        def update_and_next (btn):
            if on_update:
                on_update()
            if on_next:
                on_next()
        updateAndNextBtn.on_click(update_and_next)
        btns.append(updateAndNextBtn)

        nextBtn = widgets.Button(description='Next', button_style='warning', layout=Layout(width='60px'))
        def go_next (btn):
            if on_next:
                on_next()
        nextBtn.on_click(go_next)
        btns.append(nextBtn)

    btnItems = {
        'updateBtn': updateBtn,
        'updateAndPrevBtn': updateAndPrevBtn,
        'updateAndNextBtn': updateAndNextBtn
    }

    return (HBox(btns, layout=Layout(height='34px')), btnItems)

## Label Box

In [21]:
def make_label_box (image_id, options, next_callback=None, prev_callback=None):
    visImage = imagelabel.find_one({'image_id': image_id})
    if not visImage:
        print(f"Not found: {image_id}")
    vislabel = find_label(image_id)

    labels = set(vislabel['labels']) if vislabel else set()

    layoutArgs = {
        'padding': '10px',
        'margin': '5px',
        'border': '3px solid lightblue'
    }

    imageBox, imageBoxItems = make_image_box(visImage, width=600, height=box_height, layoutArgs=layoutArgs)

    infoBox, infoBoxItems = make_info_box(visImage, layoutArgs=layoutArgs)

    invalidLabels = set()
    def show_current_labels ():
        currentLabelsOutput = infoBoxItems['currentLabelsOutput']
        currentLabelsOutput.clear_output()
        with currentLabelsOutput:
            for l in sorted(list(labels)):
                if l in boxItemsByName:
                    print(l)
                else:
                    invalidLabels.add(l)
                    print(colored(l, 'red'))

    def on_ckb_change (change):
        ckb = change['owner']
        labelName = ckb.tooltip
        if change['new']:
            labels.add(labelName)
        else:
            labels.remove(labelName)
        show_current_labels()

    ckbBox, boxItemsByName, boxesByName = make_ckb_box(options, n_rows=7, layoutArgs=layoutArgs, on_ckb_change=on_ckb_change)

    remarks = boxItemsByName['remarks']

    def update_label ():
        vislabels.find_one_and_update({'image_id': vislabel['image_id'] if vislabel else visImage['image_id']}, {
            '$set': {
                'phash': visImage['phash'],
                'labels': sorted(list(labels - invalidLabels)),
                'remarks': remarks.value,
                'updatedAt': datetime.datetime.utcnow()
            },
            '$setOnInsert': {'createdAt': datetime.datetime.utcnow()}
        }, upsert=True)
        with remarks_output:
            if remarks.value:
                print(f"{remarks.value}")
        with infoBoxItems['output']:
            print(f"{visImage['short_image_id']} updated")
        show_current_labels()

    def on_update ():
        update_label()

    def on_prev ():
        if prev_callback:
            prev_callback(image_id)

    def on_next ():
        if next_callback:
            next_callback(image_id)

    btnBox, btnBoxItems = make_btn_box(on_update=on_update, on_prev=on_prev, on_next=on_next)

    panelBoxLayout = Layout(
        justify_content= 'flex-start',
        align_items= 'flex-start',
        align_content= 'flex-start',
    )
    panelBox = VBox([ckbBox, btnBox], layout=panelBoxLayout)

    for l in labels:
        if l in boxItemsByName:
            boxItemsByName[l].value = True

    remarks.value = vislabel['remarks'] if vislabel else ''

    with infoBoxItems['output']:
        print('ready')

    leftBox = VBox([imageBox, infoBox])

    pane_widths = [2, 4, 1]
    return AppLayout(left_sidebar=leftBox, center=panelBox, pane_widths=pane_widths, height=f'{box_height+10}px')

## Label Images

In [22]:
visImages = [i for i in imagelabel.find()]
visImages.sort(key=lambda x: x['popularity_score'])
visImages.reverse()

def label_images ():
    for i in visImages:
        visLabel = vislabels.find_one({'image_id': i['image_id']})
        if visLabel == None:
            yield i['image_id']

label_img_ids = [i['image_id'] for i in visImages]

In [23]:
goto_img_idx = -1;

current_ids = ''
if not os.path.isfile(current_ids_filepath):
    current_ids = PersistentSet()
    current_ids.set_file(current_ids_filepath)
else:
    current_ids = PersistentSet.load_set(current_ids_filepath)

if goto_img_idx != -1:
    current_ids.persist_remove(list(current_ids)[0])
    current_ids.persist_add(label_img_ids[goto_img_idx])

cell_outputs = [widgets.Output(layout=Layout(height=f'{box_height+30}px')) for i in range(len(current_ids))]

In [24]:
labelOptions = json.load(open('tmp/labelOptions.json'))
labelOptions = {c['tag']:c['options'] for c in labelOptions}
# JSON(labelOptions)

# GUI

In [25]:
gallery_limit = 50
gallery_height = 1000

def make_filter_box (options, on_filter_change=None, layoutArgs=None):
    layout = Layout(**layoutArgs)

    labels = set()
    def ckb_handler (change):
        tag = change['owner'].tag
        if change['new']:
            labels.add(tag)
        else:
            labels.remove(tag)
        if on_filter_change:
            on_filter_change(list(labels))

    ckbs = []
    for cat, opts in options.items():
        for subcat, options in group_by(opts, lambda o: o.get('subcategory', '')).items():
            ckbs.append(widgets.widgets.HTML(value=f"<b>{cat.upper()}</b> {subcat.capitalize()}"))
            for o in options:
                ckb = widgets.Checkbox(value=False, description=f"{o['name']} ({o['count']})", disabled=False, indent=False)
                ckb.tag = o['tag']
                ckb.observe(ckb_handler, names='value')
                ckbs.append(ckb)

    return VBox(ckbs, layout=layout)

def make_gallery_box (images, on_select=None, layoutArgs=None):

    def click_handler (btn):
        image_id = btn.tooltip
        if on_select:
            on_select(image_id)

    image_boxes = []
    for i in images:
        image_box = widgets.Image(value=open(f"../{i['labelling_thumbnail_path']}", 'rb').read(), width=140, height=140, layout=Layout(width='140px', height='140px'))
        btn = widgets.Button(description=i['image_name'], tooltip=i['image_id'])
        btn.on_click(click_handler)
        image_boxes.append(VBox([image_box, btn]))

    return Box(image_boxes,
               layout=Layout(display='flex', flex_flow='row wrap'))

def make_gallery (options, select_callback=None):
    layoutArgs = {
        'padding': '10px',
        'margin': '5px',
        'border': '3px solid lightblue'
    }

    def select_handler (image_id):
        show_img(image_id, cell_outputs[0])

    images = find_images_by_tags(limit=gallery_limit)

    galleryOutput = widgets.Output(layout=Layout(**layoutArgs, height=f"{gallery_height-20}px"))
    with galleryOutput:
        display(make_gallery_box(images, on_select=select_handler, layoutArgs=layoutArgs))

    def filter_handler (selections):
        images = find_images_by_tags(tags=selections, limit=gallery_limit)
        galleryOutput.clear_output()
        with galleryOutput:
            display(make_gallery_box(images, on_select=select_handler, layoutArgs=layoutArgs))

    filterBox = make_filter_box(options, on_filter_change=filter_handler, layoutArgs=layoutArgs)

    pane_widths = [1, 5, 1]
    return AppLayout(left_sidebar=filterBox, center=galleryOutput, pane_widths=pane_widths, height=f"{gallery_height}px")

In [26]:
make_gallery(labelOptions)

AppLayout(children=(VBox(children=(HTML(value='<b>FAULT</b> Data'), Checkbox(value=False, description='Selecti…

In [27]:
def prev_id (image_id):
    i = image_id
    while i in current_ids:
        if i == label_img_ids[0]:
            break
        i = label_img_ids[max(0, label_img_ids.index(i) - 1)]
    current_ids.persist_remove(image_id)
    current_ids.persist_add(i)
    return i

def next_id (image_id):
    i = image_id
    while i in current_ids:
        if i == label_img_ids[-1]:
            break
        i = label_img_ids[min(len(label_img_ids), label_img_ids.index(i) + 1)]
    current_ids.persist_remove(image_id)
    current_ids.persist_add(i)
    return i

def prev_img (output, image_id):
    show_img(prev_id(image_id), output)

def next_img (output, image_id):
    show_img(next_id(image_id), output)

def show_img (image_id, output=None):
    output.clear_output()
    with output:
        display(make_label_box(image_id, options=labelOptions, next_callback=partial(next_img, output), prev_callback=partial(prev_img, output)))

for i, current_id in zip(range(len(current_ids)), list(current_ids)):
    show_img(current_id, cell_outputs[i])

In [28]:
VBox(cell_outputs)

VBox(children=(Output(layout=Layout(height='1230px')),))

In [29]:
# imageId = 'tumblr/wtf-viz/60472192241_0'
# imageId = 'reddit/dataisugly/c6rqc6_0'
# make_label_box(imageId, labelOptions)

In [30]:
# # replace labels
# current_label = 'fault:percentage'
# new_label = ''

# for l in vislabels.find():
#     if len([l for l in l['labels'] if current_label == l]) > 0:
#         ori_length = len(l['labels'])
#         print(len(l['labels']), l['_id'], l['image_id'], l['labels'])

#         l['labels'] = [l for l in l['labels'] if current_label != l]
# #         if new_label not in l['labels']:
# #             l['labels'].append(new_label)

#         new_length = len(l['labels'])
#         print(new_length - ori_length, new_label in l['labels'], len(l['labels']), l['labels'])

# #         vislabels.update_one({'_id': l['_id']}, {'$set': {'labels': l['labels']}})
# #     break