In [None]:
%matplotlib inline

print('Loading libraries... Please wait.')

from IPython.display import display, clear_output
import ipywidgets as widgets
import json
import random
import sys
from collections import Counter
from sklearn.metrics import confusion_matrix, accuracy_score

from esper.prelude import *
from esper.widget import *
import esper.face_embeddings as face_embeddings


WIDGET_STYLE_ARGS = {'description_width': 'initial'}


def query_faces(ids):
    faces = Face.objects.filter(id__in=ids)
    return faces.values(
        'id', 'bbox_y1', 'bbox_y2', 'bbox_x1', 'bbox_x2',
        'frame__number', 'frame__video__id', 'frame__video__fps',
        'shot__min_frame', 'shot__max_frame')


def query_sample(qs, n):
    return qs.order_by('?')[:n]


def query_faces_result(faces, expand_bbox=0.05):
    """Replaces qs_to_result"""
    result = []
    for face in faces:
        if (face.get('shot__min_frame') is not None and
                face.get('shot__max_frame') is not None):
            min_frame = int(
                (face['shot__min_frame'] +
                 face['shot__max_frame']) / 2)
        else:
            min_frame = face['frame__number']
        face_result = {
            'type': 'flat', 'label': '',
            'elements': [{
                'objects': [{
                    'id': face['id'],
                    'background': False,
                    'type': 'bbox',
                    'bbox_y1': max(face['bbox_y1'] - expand_bbox, 0),
                    'bbox_y2': min(face['bbox_y2'] + expand_bbox, 1),
                    'bbox_x1': max(face['bbox_x1'] - expand_bbox, 0),
                    'bbox_x2': min(face['bbox_x2'] + expand_bbox, 1),
                }],
                'min_frame': min_frame,
                'video': face['frame__video__id']
            }]
        }
        result.append(face_result)
    return {'type': 'Face', 'count': 0, 'result': result}


def simple_random_sampler(n):
    return face_embeddings.sample(n)


def get_nn_sampler(seed_ids, k=10, stride=25):
    def _nn_sampler(n):
        sample_ids = random.sample(seed_ids, min(k, len(seed_ids)))
        mean_emb = face_embeddings.mean(sample_ids)
        return [i for i, d in face_embeddings.knn(targets=[mean_emb], k=n, sample=stride)]
    return _nn_sampler


def print_labeling_status():
    print('Current hand labels:')
    counts = Counter(HAND_LABELS.values())
    for k in counts:
        print('  {}: {}'.format('positive' if k == 1 else 'negative', counts[k]))


def compute_distance_to_positive_labels(ids):
    ids = list(ids)
    pos_ids = [a for a, b in HAND_LABELS.items() if b == 1]
    pos_ids = [a for a, b in zip(pos_ids, face_embeddings.exists(pos_ids)) if b]
    return {
        a: b for a, b in zip(
            ids, face_embeddings.dist(ids, target_ids=pos_ids)
        )}
        

def label_random_faces(n, sampler=simple_random_sampler, order_by='random'):
    print('Loading {} faces'.format(n))
    # Simple random sample across the entire dataset
    face_ids = sampler(n)
    faces = list(query_faces(face_ids))
    del face_ids
    
    if order_by == 'random':
        random.shuffle(faces)
    elif order_by == 'positive':
        face_dists = compute_distance_to_positive_labels([f['id'] for f in faces])
        faces.sort(key=lambda x: face_dists[x['id']])
    else:
        print('Unknown sort order: use random instead', file=sys.stderr)
        random.shuffle(faces)
        
    selection_widget = esper_widget(
        query_faces_result(faces),
        disable_playback=True, jupyter_keybindings=True, disable_captions=True,
        crop_bboxes=True, results_per_page=faces_per_page())

    submit_button = widgets.Button(
        layout=widgets.Layout(width='auto'),
        style=WIDGET_STYLE_ARGS,
        description='Save labels',
        disabled=False,
        button_style='danger'
    )
    def on_submit(b):
        yes_ids = [faces[i]['id'] for i in selection_widget.selected]
        no_ids = [faces[i]['id'] for i in selection_widget.ignored]
        clear_output()
        for i in yes_ids:
            HAND_LABELS[i] = 1
        for i in no_ids:
            HAND_LABELS[i] = 0
        
        print('Added {} positive and {} negative labels.'.format(
              len(yes_ids), len(no_ids)))
        print_labeling_status()
        label_random_faces(n, sampler, order_by)
    submit_button.on_click(on_submit)

    refresh_button = widgets.Button(
        layout=widgets.Layout(width='auto'),
        style=WIDGET_STYLE_ARGS,
        description='Refresh (w/o saving)',
        disabled=False,
        button_style=''
    )
    def on_refresh(b):
        clear_output()
        label_random_faces(n, sampler, order_by)
    refresh_button.on_click(on_refresh)

    dismiss_button = widgets.Button(
        layout=widgets.Layout(width='auto'),
        style=WIDGET_STYLE_ARGS,
        description='Dismiss widget (w/o saving)',
        disabled=False,
        button_style=''
    )
    def on_dismiss(b):
        clear_output()
        print('Dismissed widget. Re-run cell to get it back.')
    dismiss_button.on_click(on_dismiss)

    display(widgets.HBox([submit_button, refresh_button, dismiss_button]))
    display(selection_widget)


DEFAULT_FACES_PER_PAGE = 100
_faces_per_page_slider = widgets.IntSlider(
    value=DEFAULT_FACES_PER_PAGE,
    style=WIDGET_STYLE_ARGS,
    min=25,
    max=250,
    step=25,
    description='Faces per widget page:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
def faces_per_page():
    try:
        return _faces_per_page_slider.value
    except:
        return DEFAULT_FACES_PER_PAGE

print('Done!')

display(_faces_per_page_slider)

In [None]:
try: HAND_LABELS
except NameError: HAND_LABELS = {}

# Labeling

`label_random_faces()` will sample faces from the dataset.

<b>Keys:</b>
 - To label a <b>yes</b> face, press '['. To label all faces on a page, press '{' (i.e., shift + '[').
 - To label a <b>no</b> face, press ']'. To label all faces on a page, press '}' (i.e., shift + ']').
 - To expand an image, press '=', and press again to shrink it.
 - To label ALL previously unlabeled faces up to and including the current face as <b>yes</b>, press '?' (i.e., shift + '/').
 - Do not highlight if unsure.

Once you are satisfied with your selections, press <b>save labels</b> to add the labels to HAND_LABELS. Re-run this cell as needed.

Note: the bounding boxes have been expanded by 5%.


In [None]:
label_random_faces(
    faces_per_page(),  # number of faces to show (exactly 1 page)
    sampler=simple_random_sampler,
#     order_by='random'
    order_by='positive'
)

Sample some faces, but bias the sample towards positives.

In [None]:
label_random_faces(
    faces_per_page(),  # number of faces to show (exactly 1 page)
    sampler=get_nn_sampler(
        [a for a, b in HAND_LABELS.items() if b == 1],
        stride=10
    ),
    order_by='positive'
)

# Show labeled counts

In [None]:
print_labeling_status()

# Saving labels

In [None]:
def save_hand_labels(filename, overwrite=False):
    if not overwrite and os.path.exists(filename):
        raise Exception('File already exists!')
    with open(filename, 'w') as f:
        json.dump(HAND_LABELS, f)
    print('Saved:', filename)

def load_hand_labels(filename):
    with open(filename) as f:
        return json.load(f)

In [None]:
save_hand_labels('/app/data/black.250.random.json')