<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#James-Comey-(static-images)" data-toc-modified-id="James-Comey-(static-images)-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>James Comey (static images)</a></span></li><li><span><a href="#James-Comey-(live-widget)" data-toc-modified-id="James-Comey-(live-widget)-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>James Comey (live widget)</a></span></li></ul></div>

In [None]:
from esper.prelude import *
from esper.widget import *
from esper.plot_util import *
from query.models import *
from collections import defaultdict
from datetime import datetime
import random

In [None]:
def get_shots_with_two_named_people(person1, person2, identity_threshold=0.8):
    def shots_with_identity(name):
        return {
            x['face__shot__id'] for x in FaceIdentity.objects.filter(
                identity__name=name.lower(), probability__gt=identity_threshold
            ).values('face__shot__id') if x['face__shot__id'] is not None
        }
    return shots_with_identity(person1) & shots_with_identity(person2)

def montage_shots_by_time(shot_ids, agg_fn, n_per_time, n_cols):
    shots = list(Shot.objects.filter(
        id__in=shot_ids if isinstance(shot_ids, list) else list(shot_ids)
    ).select_related('video').order_by('video__time'))

    time_to_shots = defaultdict(list)
    for shot in shots:
        time_to_shots[agg_fn(shot.video.time)].append(shot)
        
    def choose_best_images(imgs, shots, n, resample=(30, 20)):
        # Greedily choose images that maximize distance to the current sample
        if len(imgs) <= n:
            return imgs, shots
        
        imgs = [(im, cv2.resize(im, resample), shot_id) for im, shot_id in zip(imgs, shots)]
        sample = [imgs[0]]
        del imgs[0]
        while len(sample) < n:
            max_min_dist = None
            next_sample_idx = None
            for i, img in enumerate(imgs):
                min_dist = min([np.linalg.norm(s[1] - img[1]) for s in sample])
                if max_min_dist is None or min_dist > max_min_dist:
                    max_min_dist = min_dist
                    next_sample_idx = i
            sample.append(imgs[next_sample_idx])
            del imgs[next_sample_idx]
        return [s[0] for s in sample], [s[2] for s in sample]
        
    def tile_helper(shots):
        imgs = [load_frame(shot.video, int((shot.max_frame + shot.min_frame) / 2), []) 
                for shot in shots]
        shape = (imgs[0].shape[1], imgs[0].shape[0])
        imgs = [cv2.resize(img, shape) for img in imgs]
        imgs, img_shots = choose_best_images(imgs, shots, n_per_time)
        return (
            tile_images(imgs, cols=n_cols, blank_value=255),
            img_shots,
            len(shots), 
            sum([(s.max_frame - s.min_frame) / s.video.fps for s in shots])
        )
    
    times = list(time_to_shots.keys())
    shots = [time_to_shots[t] for t in times]
    images = par_for(tile_helper, shots)
    return { t : image for t, image in zip(times, images) }

def compare_by_time(person, hosts, agg_fn, n_per_time=20, n_cols=5):
    host_to_shots_by_time = {}
    for host in hosts:
        print('Searching for {} and {}'.format(host, person))
        host_to_shots_by_time[host] = montage_shots_by_time(
            get_shots_with_two_named_people(person, host),
            agg_fn, n_per_time, n_cols
        )
    all_times = set()
    for shots_by_time in host_to_shots_by_time.values():
        all_times.update(shots_by_time.keys())
    for t in sorted(all_times):
        print(t)
        for host in hosts:
            if t in host_to_shots_by_time[host]:
                im, im_shots, num_shots, seconds = host_to_shots_by_time[host][t]
                print('{} - {} shots - {:0.2f} min - Showing shots: {}'.format(
                      host, num_shots, seconds / 60, [s.id for s in im_shots]))
                imshow(im)
                plt.show()
                
agg_by_day = lambda t: datetime(day=t.day, month=t.month, year=t.year)
agg_by_month = lambda t: datetime(day=1, month=t.month, year=t.year)
agg_by_year = lambda t: datetime(day=1, month=1, year=t.year)

In [None]:
hosts = [
    'Sean Hannity', 'Bill O\'Reilly', 'Tucker Carlson', 
    'Rachel Maddow', 'Chris Matthews', 'Wolf Blitzer', 
    'Lawrence O\'Donnell',
]

# James Comey (static images)

In [None]:
compare_by_time('James Comey', hosts, agg_by_month)

# James Comey (live widget)

In [None]:
def get_shots_with_person_and_hosts(person, hosts, identity_threshold=0.8, filter_kwargs={}):
    shots_with_person =  {
        x['face__shot__id'] for x in FaceIdentity.objects.filter(
            identity__name=person.lower(), probability__gt=identity_threshold,
            **filter_kwargs
        ).values('face__shot__id') if x['face__shot__id'] is not None
    }
    shots_with_hosts = {
        x['face__shot__id'] for x in FaceIdentity.objects.filter(
            identity__name__in=[h.lower() for h in hosts],
            probability__gt=identity_threshold, **filter_kwargs
        ).values('face__shot__id') if x['face__shot__id'] is not None
    }
    return list(shots_with_person & shots_with_hosts)

def subsample_by_video(shots, n):
    # Limit the number of shots per video
    video_id_to_shot = defaultdict(list)
    for s in Shot.objects.filter(id__in=shots).values('id', 'video__id'):
        video_id_to_shot[s['video__id']].append(s['id'])
    sample = []
    for v, l in sorted(video_id_to_shot.items()):
        sample.extend(l if len(l) <= n else random.sample(l, n))
    return sample

def reorder_shots(shots):
    return [
        s.id for s in Shot.objects.filter(
            id__in=shots
        ).order_by('video__show__canonical_show__name', 'video__time')
    ]

candidate_shots = reorder_shots(
    subsample_by_video(
        get_shots_with_person_and_hosts('James Comey', hosts), 
        3
    )
)
widget = esper_widget(
    qs_to_result(Shot.objects.filter(id__in=candidate_shots), 
                 custom_order_by_id=candidate_shots, 
    limit=len(candidate_shots))
)
print('Select shots to include in the montage.')
widget

In [None]:
selected_shots = [candidate_shots[i] for i in widget.selected]

In [None]:
def group_shots_by_identity(shots, names):
    identity_to_shots = defaultdict(list)
    for x in FaceIdentity.objects.filter(
                identity__name__in=[s.lower() for s in names], face__shot__id__in=shots
    ).values('face__shot__id', 'identity__name'):
        identity_to_shots[x['identity__name']].append(x['face__shot__id'])
    return identity_to_shots

results = []
for name, shots in group_shots_by_identity(selected_shots, hosts).items():
    results.append((name, qs_to_result(Shot.objects.filter(id__in=shots), limit=len(shots))))
esper_widget(group_results(results))

In [None]:
print(selected_shots)

In [None]:
def montage_shots(shots, cols=5):
    imgs = [load_frame(shot.video, int((shot.max_frame + shot.min_frame) / 2), []) 
            for shot in Shot.objects.filter(id__in=shots).order_by('video__time')]
    shape = (imgs[0].shape[1], imgs[0].shape[0])
    imgs = [cv2.resize(img, shape) for img in imgs]
    return tile_images(imgs, cols=cols, blank_value=255)
for name, shots in group_shots_by_identity(selected_shots, hosts).items():
    print(name)
    imshow(montage_shots(shots))
    plt.show()

In [None]:
group_shots_by_identity([45188880, 48495239, 10806744, 60303840, 41194177, 44591079, 29310912, 37307674, 22151574, 44746669, 6062987, 584292, 28005192, 584307, 44419012, 27039495, 47459051, 45164767, 49952789, 59278225, 59278241, 41072250, 28088277, 58423117, 32191124, 19482668, 5848914, 11292838, 54541370, 8589521, 50722074, 22458768, 25510239, 10714080, 10713245, 2526491, 113621, 113168, 57554026, 52691455, 33194816, 1678041, 21838447, 23198934, 44426599, 157794], hosts)