In [1]:
from collections import Counter, defaultdict
from tqdm import tqdm

import esper.face_embeddings as face_embed
from esper.widget import esper_widget, qs_to_result

videos = Video.objects.all()
# videos = [
#     Video.objects.get(path='tvnews/videos/MSNBC_20110110_150000_Jansing_and_Co..mp4')
# ]

PROB_THRESH = 0.9
MIN_LABEL_THRESH = 5
L2_THRESH = 0.5
DEBUG = False

rekognition_labeler = Labeler.objects.get(name='face-identity-rekognition')
rekognition_labeler_augmented, created_labeler = Labeler.objects.get_or_create(
    name='face-identity-rekognition:augmented-l2-dist={}'.format(L2_THRESH)
)
if created_labeler:
    print('Created labeler:', rekognition_labeler_augmented.name)
sampler_3s = FrameSampler.objects.get(name='3s')

def process_video(video):
    if DEBUG: print('Video:', video.path)
    
    if not created_labeler and FaceIdentity.objects.filter(
        labeler=rekognition_labeler_augmented,
        face__frame__sampler=sampler_3s,
        face__frame__video=video
    ).exists():
        return
    
    face_idents = list(FaceIdentity.objects.filter(
        face__frame__video=video, face__frame__sampler=sampler_3s,
        labeler=rekognition_labeler, probability__gt=PROB_THRESH
    ).select_related())
    if DEBUG: print('Found {} face identities'.format(len(face_idents)))
    
    if len(face_idents) == 0:
        return
    
    all_faces = list(Face.objects.filter(
        frame__video=video, frame__sampler=sampler_3s
    ).select_related())
    if DEBUG: print('Total {} faces'.format(len(all_faces)))
    
    all_faces_ids = [x.id for x in all_faces]
    face_ids_with_embs = {
        i for i, ex in
        zip(all_faces_ids, face_embed.exists(all_faces_ids)) if ex
    }
    
    counts = Counter()
    for fi in face_idents:
        counts[fi.identity.name] += 1
    
    names_to_prop = set()
    for name in counts:
        if counts[name] > MIN_LABEL_THRESH:
            names_to_prop.add(name)
    if DEBUG: print('Propagating {} names'.format(len(names_to_prop)))
            
    labeled_ids = {
        fi.face.id : fi.identity.id for fi in face_idents 
        if fi.face.id in face_ids_with_embs and fi.identity.name in names_to_prop
    }
    unlabeled_ids = [
        f.id for f in all_faces            
        if f.id in face_ids_with_embs and f.id not in labeled_ids
    ]
    if len(labeled_ids) == 0 or len(unlabeled_ids) == 0:
        return
    
    # Filter out unlabeled ids with no candidates
    unlabeled_ids_with_candidates = []
    for a, b in zip(unlabeled_ids, face_embed.dist(unlabeled_ids, target_ids=list(labeled_ids))):
        if b < L2_THRESH:
            unlabeled_ids_with_candidates.append(a)

    new_labels = defaultdict(lambda: Counter())
    for face_id, identity_id in labeled_ids.items():
        for a, b in zip(
            unlabeled_ids_with_candidates, 
            face_embed.dist(unlabeled_ids_with_candidates, target_ids=[face_id])
        ):
            if b < L2_THRESH:
                new_labels[a][identity_id] += 1
    
    still_unlabeled = [i for i in unlabeled_ids if i not in new_labels]
    if DEBUG: 
        print('Found {} labels, {} are still unlabeled'.format(
              len(new_labels), len(still_unlabeled)))
    
    new_identity_labels = []
    num_conflicts = 0
    for face_id, identity_counter in new_labels.items():
        if len(identity_counter) > 1:
            num_conflicts += 1
        identity_id, identity_count = identity_counter.most_common(1)[0]
        new_identity_labels.append(FaceIdentity(
            labeler=rekognition_labeler_augmented, face_id=face_id,
            probability=0.5, identity_id=identity_id
        ))
    if num_conflicts > 0:
        if DEBUG: print('Found {} conflicts in {}'.format(num_conflicts, video.path))
    if len(new_identity_labels) > 0:
        FaceIdentity.objects.bulk_create(new_identity_labels)

Created labeler: face-identity-rekognition:augmented-l2-dist=0.5


In [2]:
for video in tqdm(videos):
    process_video(video)

100%|███████████████████████████████████████████████████| 257031/257031 [28:20:28<00:00, 75.31it/s]


In [None]:
# esper_widget(qs_to_result(Face.objects.filter(id__in=[
#         l for l in new_labels if 'michael grimm' in new_labels[l]
#     ]), limit=200),
#     jupyter_keybindings=True)

In [None]:
# new_labels