In [1]:
from pathlib import Path
from functools import partial
import datetime
import os

import json
from bson import json_util
import ipywidgets as widgets
from IPython.display import JSON

from pymongo import MongoClient
import pydash as _
import pandas as pd

In [2]:
images_dir = Path("../images")
gallery_dir = Path("../")
handmade_dir = Path("./handmade")
label_dir = handmade_dir / "labels"
label_dir.mkdir(exist_ok=True)

In [3]:
mongo_uri = json.load(open('./credentials/mongodb_credentials.json'))['uri']
mongo = MongoClient(mongo_uri)
db = mongo["bad-vis"]
imagelabel = db["imagelabel"]
visclean = db["visclean"]
visclean.create_index('image_id', unique=True)

'image_id_1'

## Save Labels

In [4]:
if os.path.isfile(label_dir / "cleaning_labels.json"):
    os.rename(
        label_dir / "chart_types.json",
        label_dir / f"chart_types_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.json",
    )

In [5]:
json.dump(
    [_.omit(l, "_id") for l in visclean.find()],
    open(label_dir / "chart_types.json", "w"),
    default=json_util.default,
)

## Load dataset

In [6]:
df_classifier_results = pd.read_csv('./tmp/targets_classified.csv')
df_classifier_results['image_name'] = df_classifier_results['filepath'].str.extract(r'w\/(.+?)\.')
df_classifier_results

Unnamed: 0,source,filepath,result,image_name
0,targets,../datasets/badvis/labelling/preview/2l5nix_0.jpg,other,2l5nix_0
1,targets,../datasets/badvis/labelling/preview/iv1xj9_0.jpg,bar,iv1xj9_0
2,targets,../datasets/badvis/labelling/preview/133942386...,circle,133942386291_0
3,targets,../datasets/badvis/labelling/preview/997384124...,map,99738412430_0
4,targets,../datasets/badvis/labelling/preview/1mgcoa_0.jpg,bar,1mgcoa_0
...,...,...,...,...
8892,targets,../datasets/badvis/labelling/preview/dq9mu7_0.png,bar,dq9mu7_0
8893,targets,../datasets/badvis/labelling/preview/ab453y_0.png,other,ab453y_0
8894,targets,../datasets/badvis/labelling/preview/8dwi6p_0.png,other,8dwi6p_0
8895,targets,../datasets/badvis/labelling/preview/bt9wvk_0.png,line,bt9wvk_0


In [7]:
df_classifier_results['result'].value_counts()

bar       2878
circle    1809
other     1631
line      1167
map        838
area       299
graph      130
point      102
grid        35
box          7
text         1
Name: result, dtype: int64

In [8]:
chart_type = 'bar'
df_tagging = df_classifier_results[df_classifier_results['result'] == chart_type]
default_tag = chart_type
print(len(df_tagging))

2878


In [9]:
def make_image_box(image, width=1000, height=800, layoutArgs={}):
    #     image = widgets.Image(value=open(gallery_dir/image['labelling_path'], 'rb').read(), width=600, layout=Layout(object_position='center center', object_fit='contain'))
    image = widgets.Image(
        value=open(gallery_dir / image["labelling_path"], "rb").read(),
        width=width,
        height=height,
        layout=widgets.Layout(object_position="center center", object_fit="contain"),
    )
    boxItems = {"image": image}

    image_box_layout = widgets.Layout(justify_content="center", **layoutArgs)
    image_box_layout = widgets.Layout(
        height="500px", justify_content="center", **layoutArgs
    )
    imageBox = widgets.HBox([image], layout=image_box_layout)

    return (imageBox, boxItems)

def make_cleaning_box (image_name, default_tag=None):
    visImage = imagelabel.find_one({'image_name': image_name})
    if not visImage:
        print(image_name)
        return
    visClean = visclean.find_one({'image_name': image_name})
    
    btns_by_name = {}
    btns = []
    output = widgets.Output()
    
    btn_configs = [
        {'name': 'multipleviews', 'description': 'MultipleViews', 'tag': 'multipleviews', 'style': 'info'},
        {'name': 'infographics', 'description': 'Infographics', 'tag': 'infographics', 'style': 'info'},
        {'name': 'bar', 'description': 'Bar', 'tag': 'bar'},
        {'name': 'line', 'description': 'Line', 'tag': 'line'},
        {'name': 'circle', 'description': 'Circle', 'tag': 'circle'},
        {'name': 'map', 'description': 'Map', 'tag': 'map'},
        {'name': 'point', 'description': 'Point', 'tag': 'invalid'},
        {'name': 'area', 'description': 'Area', 'tag': 'area'},
        {'name': 'grid', 'description': 'Grid', 'tag': 'grid'},
        {'name': 'graph', 'description': 'Graph', 'tag': 'graph'},
        {'name': 'box', 'description': 'Box', 'tag': 'box'},
        {'name': 'text', 'description': 'Text', 'tag': 'text'},
        {'name': 'invalid', 'description': 'Invalid', 'tag': 'invalid', 'style': 'warning'},
    ]
    
    def update_label (tag):
        visclean.find_one_and_update({'image_id': visImage['image_id']}, {'$set': {'tag': tag}})
        
    def create_label (default_tag=None):
        visclean.insert_one({
            'image_id': visImage['image_id'],
            'image_name': visImage['image_name'],
            'labelling_path': visImage['labelling_path'],
            'phash': visImage['phash'],
            'auto': default_tag,
            'checked': True,
            "updatedAt": datetime.datetime.utcnow(),
            "createdAt": datetime.datetime.utcnow()
        })
    
    def on_click (btn_config, btn):
        btn.button_style = 'success'
        update_label(btn_config['tag'])
        with output:
            print(btn_config['description'], 'clicked')
    
    for btn_config in btn_configs:
        btn = widgets.Button(description=btn_config['description'], button_style=btn_config.get('style', ''))
        btns_by_name[btn_config['name']] = btn
        btns.append(btn)
        btn.on_click(partial(on_click, btn_config))
    
    # create_label(default_tag)
    with output:
        print('default:', default_tag)
        if visClean:
            print('Cleaning label exists')
            print('createdAt', visClean['createdAt'])
            print('updatedAt', visClean['updatedAt'])
            print('auto', visClean['auto'])
            if 'tag' in visClean: print('tag', visClean['tag'])
            
    imageBox, boxItems = make_image_box(visImage)

    return widgets.HBox([
        imageBox,
        widgets.VBox(btns),
        output
    ])

In [10]:
examined = 0
charts = df_tagging.iterrows()

In [11]:
showed = 0
for index, chart in charts:
    label = visclean.find_one({'image_name': chart['image_name']})
    if not label:
        display(make_cleaning_box(chart['image_name'], default_tag=default_tag))
        showed += 1
    examined += 1
    if showed >= 100:
        print(examined, '/', len(df_tagging_charts))
        break

kspyrw_0


None

4oy2x9_0


None

2qvicf_1


None