In [1]:
%matplotlib inline

print('Loading libraries... Please wait.')

from IPython.display import display, clear_output
import ipywidgets as widgets
import json
import random
import sys
import numpy as np
from collections import Counter
from sklearn.metrics import confusion_matrix, accuracy_score
from tqdm import tqdm

from esper.prelude import *
from esper.widget import *
import esper.face_embeddings as face_embeddings


WIDGET_STYLE_ARGS = {'description_width': 'initial'}


def query_faces(ids):
    faces = Face.objects.filter(id__in=ids)
    return faces.values(
        'id', 'bbox_y1', 'bbox_y2', 'bbox_x1', 'bbox_x2',
        'frame__number', 'frame__video__id', 'frame__video__fps',
        'shot__min_frame', 'shot__max_frame')


def query_sample(qs, n):
    return qs.order_by('?')[:n]


def query_faces_result(faces, expand_bbox=0.05):
    """Replaces qs_to_result"""
    result = []
    for face in faces:
        if (face.get('shot__min_frame') is not None and
                face.get('shot__max_frame') is not None):
            min_frame = int(
                (face['shot__min_frame'] +
                 face['shot__max_frame']) / 2)
        else:
            min_frame = face['frame__number']
        face_result = {
            'type': 'flat', 'label': '',
            'elements': [{
                'objects': [{
                    'id': face['id'],
                    'background': False,
                    'type': 'bbox',
                    'bbox_y1': max(face['bbox_y1'] - expand_bbox, 0),
                    'bbox_y2': min(face['bbox_y2'] + expand_bbox, 1),
                    'bbox_x1': max(face['bbox_x1'] - expand_bbox, 0),
                    'bbox_x2': min(face['bbox_x2'] + expand_bbox, 1),
                }],
                'min_frame': min_frame,
                'video': face['frame__video__id']
            }]
        }
        result.append(face_result)
    return {'type': 'Face', 'count': 0, 'result': result}


def print_labeling_status():
    print('Current hand labels:')
    counts = Counter(HAND_LABELS.values())
    if len(counts) > 0:
        for k in counts:
            print('  {}: {}'.format(
                  'positive' if k == 1 else 'negative', counts[k]))
    else:
        print('  no hand labels...')


def compute_distance_to_positive_labels(ids):
    ids = list(ids)
    pos_ids = [a for a, b in HAND_LABELS.items() if np.isclose(b, 1)]
    pos_ids = [a for a, b in zip(pos_ids, face_embeddings.exists(pos_ids)) if b]
    if len(pos_ids) == 0:
        print('No positive labels to order by yet...', file=sys.stderr)
        dists = [0] * len(ids)
    else:
        dists = face_embeddings.dist(ids, target_ids=pos_ids)
    return {
        a: b for a, b in zip(
            ids, dists
        )
    }
        

def label_faces(n=None, sampler=None, order_by=None):
    if n is None:
        n = faces_per_page()
    print('Loading {} faces'.format(n))
    
    # Simple random sample across the entire dataset
    face_ids = sampler(n) if sampler else simple_random_sampler(n)
    if len(face_ids) == 0:
        print('Sampler returned no faces.')
    
    faces = list(query_faces(face_ids))
    del face_ids
    
    faces = order_by(faces) if order_by else random_order(faces)
        
    selection_widget = esper_widget(
        query_faces_result(faces),
        disable_playback=True, jupyter_keybindings=True, disable_captions=True,
        crop_bboxes=True, results_per_page=faces_per_page(),
        show_inline_metadata=False)

    submit_button = widgets.Button(
        layout=widgets.Layout(width='auto'),
        style=WIDGET_STYLE_ARGS,
        description='Save labels',
        disabled=False,
        button_style='danger'
    )
    def on_submit(b):
        yes_ids = [faces[i]['id'] for i in selection_widget.selected]
        no_ids = [faces[i]['id'] for i in selection_widget.ignored]
        clear_output()
        for i in yes_ids:
            HAND_LABELS[i] = 1
        for i in no_ids:
            HAND_LABELS[i] = 0
        
        print('Added {} positive and {} negative labels.'.format(
              len(yes_ids), len(no_ids)))
        print_labeling_status()
        label_faces(n, sampler, order_by)
    submit_button.on_click(on_submit)

    refresh_button = widgets.Button(
        layout=widgets.Layout(width='auto'),
        style=WIDGET_STYLE_ARGS,
        description='Refresh (w/o saving)',
        disabled=False,
        button_style=''
    )
    def on_refresh(b):
        clear_output()
        label_faces(n, sampler, order_by)
    refresh_button.on_click(on_refresh)

    dismiss_button = widgets.Button(
        layout=widgets.Layout(width='auto'),
        style=WIDGET_STYLE_ARGS,
        description='Dismiss widget (w/o saving)',
        disabled=False,
        button_style=''
    )
    def on_dismiss(b):
        clear_output()
        print('Dismissed widget. Re-run cell to get it back.')
    dismiss_button.on_click(on_dismiss)

    display(widgets.HBox([submit_button, refresh_button, dismiss_button]))
    display(selection_widget)


DEFAULT_FACES_PER_PAGE = 200
_faces_per_page_slider = widgets.IntSlider(
    value=DEFAULT_FACES_PER_PAGE,
    style=WIDGET_STYLE_ARGS,
    min=25,
    max=250,
    step=25,
    description='Faces per widget page:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
def faces_per_page():
    try:
        return _faces_per_page_slider.value
    except:
        return DEFAULT_FACES_PER_PAGE

print('Done!')

display(_faces_per_page_slider)

Loading libraries... Please wait.
Done!


IntSlider(value=200, continuous_update=False, description='Faces per widget page:', max=250, min=25, step=25, …

In [2]:
try: HAND_LABELS
except NameError: HAND_LABELS = {}

# Get existing hand labels

The HAND_LABELS variable stores all current labels in a dictionary of {face_id: 0 or 1 score}.
You can load existing hand handlabels from a json file on disk or from FaceTags in the database.

In [3]:
def load_hand_labels_json(filename):
    """Load handlabels from a file"""
    with open(filename) as f:
        return {int(k): v for k, v in json.load(f).items()}

def load_hand_labels_database(labeler_name, tag_name):
    """Load handlabels from the database"""
    labeler = Labeler.objects.get(name=labeler_name)
    tag = Tag.objects.get(name=tag_name)
    labels = {}
    for f in FaceTag.objects.filter(
        labeler=labeler, tag=tag
    ).values('face__id', 'score'):
        labels[f['face__id']] = f['score']
    return labels

Run the cell below to load existing progress.

In [None]:
# HAND_LABELS = load_hand_labels_json('/app/data/black.1k.random.json')

In [None]:
# LABELER_NAME = 'race:black:labeler=james'
# TAG_NAME = 'black'
# HAND_LABELS = load_hand_labels_database(LABELER_NAME, TAG_NAME)

# Sampling policy

This defines which faces are to be presented to the user. A sampling call back (one of the functions below) can be passed to the labeling workflow.

In [4]:
def simple_random_sampler(n):
    """Sample faces truly at random"""
    return face_embeddings.sample(n)


def build_nn_sampler(seed_ids, k=10, stride=25):
    """Build function to sample faces that are 'close' to the seed ids"""
    def _nn_sampler(n):
        sample_ids = random.sample(seed_ids, min(k, len(seed_ids)))
        mean_emb = face_embeddings.mean(sample_ids)
        return [i for i, d in 
                face_embeddings.knn(targets=[mean_emb], k=n, sample=stride)]
    return _nn_sampler


def build_prev_labeled_sampler(other_labeler_names, tag_name):
    """Build function to sample faces labeled by other labelers"""
    labeler_ids = [l.id for l in 
                   Labeler.objects.filter(name__in=other_labeler_names)]
    assert len(labeler_ids) == len(other_labeler_names), 'Not all labelers are valid!'
    tag = Tag.objects.get(name=tag_name)
    def _prev_labeled_sampler(n):
        exclude_ids = [face_id for face_id in HAND_LABELS]
        sample_ids = [
            f['face__id'] for f in FaceTag.objects.filter(
                labeler__id__in=labeler_ids, tag=tag
            ).exclude(face__id__in=exclude_ids)[:n].values('face__id')
        ]
        return sample_ids
    return _prev_labeled_sampler

print('Loaded default samplers')

Loaded default samplers


# Ordering policy

The ordering function defines the order in which faces are presented in the labeling widget. The following cell implements various ordering callbacks that can be used.

In [5]:
def random_order(faces):
    """Sorts faces in random order"""
    random.shuffle(faces)
    return faces


def likely_positives_first(faces):
    """Sorts faces in order of closest to your labeled positives"""
    face_dists = compute_distance_to_positive_labels(
        [f['id'] for f in faces])
    faces.sort(key=lambda x: face_dists.get(x['id'], 0))
    return faces


def build_other_labeler_order_func(other_labeler_names, tag_name):
    """Return function to sort faces by labels produced by other labelers"""
    labeler_ids = [l.id for l in 
                   Labeler.objects.filter(name__in=other_labeler_names)]
    assert len(labeler_ids) == len(other_labeler_names), 'Not all labelers are valid!'
    tag = Tag.objects.get(name=tag_name)
    def _other_labeler_order(faces):
        face_ids = [f['id'] for f in faces]
        face_scores = Counter()
        for ft in FaceTag.objects.filter(
            face__id__in=face_ids, labeler__id__in=labeler_ids, tag=tag
        ).values('face__id', 'score'):
            face_scores[ft['face__id']] += ft['score']
        faces.sort(key=lambda x: -face_scores[x['id']])
        return faces
    return _other_labeler_order

print('Loaded default order by functions')

Loaded default order by functions


# Labeling

`label_faces()` will sample faces from the dataset.

<b>Keys:</b>
 - To label a <b>yes</b> face, press '['. To label all faces on a page, press '{' (i.e., shift + '[').
 - To label a <b>no</b> face, press ']'. To label all faces on a page, press '}' (i.e., shift + ']').
 - To expand an image, press '=', and press again to shrink it.
 - To label ALL previously unlabeled faces up to and including the current face as <b>yes</b>, press '?' (i.e., shift + '/').
 - Do not highlight if unsure.

Once you are satisfied with your selections, press <b>save labels</b> to add the labels to HAND_LABELS. Re-run this cell as needed.

Note: the bounding boxes have been expanded by 5%.

## Label randomly sampled faces. (Below)

In [15]:
label_faces(
    sampler=simple_random_sampler,
    order_by=likely_positives_first
)

Dismissed widget. Re-run cell to get it back.


## Label faces already labeled by other labelers. (Below)

In [14]:
LABELER_NAMES = ['race:black:labeler=james']
TAG_NAME = 'black'
label_faces(
    sampler=build_prev_labeled_sampler(
        LABELER_NAMES, TAG_NAME),
    order_by=build_other_labeler_order_func(
        LABELER_NAMES, TAG_NAME)
)

Dismissed widget. Re-run cell to get it back.


# Show labeled counts

In [13]:
print_labeling_status()

Current hand labels:
  no hand labels...


# Saving hand labels

In [6]:
def save_hand_labels_json(filename, overwrite=False):
    """Save the handlabels to a local file"""
    if not overwrite and os.path.exists(filename):
        raise Exception('File already exists!')
    with open(filename, 'w') as f:
        json.dump(HAND_LABELS, f)
    print('Saved:', filename)
    
def save_hand_labels_database(labeler_name, tag_name):
    """Save the handlabels to the Esper database"""
    labeler, created = Labeler.objects.get_or_create(name=labeler_name)
    print('Created labeler:' if created else 'Using labeler:', labeler.name)
    tag, created = Tag.objects.get_or_create(name=tag_name)
    print('Created tag:' if created else 'Using tag:', tag.name)
    new_count = 0
    update_count = 0
    for a, b in tqdm(HAND_LABELS.items(), desc='Saving labels'):
        face_tag, created = FaceTag.objects.get_or_create(
            face__id=a, labeler=labeler, tag=tag,
            defaults={'face_id': a, 'score': b}
        )
        if not created:
            old_score = face_tag.score
            face_tag.score = b
            face_tag.save()
            update_count += 1
        else:
            new_count += 1
    print('Saved {} new hand labels (and updated {} existing labels)'.format(
          new_count, update_count))

Run the cell below to save labels.

In [None]:
# save_hand_labels_json('/app/data/black.1k.random.json')

In [None]:
# LABELER_NAME = 'race:black:labeler=james'
# TAG_NAME = 'black'
# save_hand_labels_database(LABELER_NAME, TAG_NAME)