# Interview Query

In this notebook, we will develop the interview query on a dev set from scratch.

In [1]:
dev_set = [38275, 42756, 52945, 34642, 19959, 37170, 55711, 45698, 20380, 3952,
           20450, 52749, 13927, 16215, 57384, 8859, 41725, 10323, 33541, 38420,
           23184, 19882, 17458, 34359]
test_set = [54377, 26386, 5281, 763, 9499, 24847, 13247, 29001, 9480, 9215, 27188,
            13058, 32996, 6185, 36755, 13993, 4143, 3730, 15916, 529, 11579, 48140,
            41480, 16693]

In [2]:
from app.models import Video, Face, FaceIdentity, LabeledInterview
from rekall import Interval, IntervalSet, IntervalSetMapping, Bounds3D
from rekall.stdlib import ingest
from rekall.predicates import *
# from app.captions import *
import math
import numpy as np
import random

In [3]:
from esperlib.widget import vgrid_widget
from vgrid import VideoBlockFormat
from app.models import Video

video_meta = [Video.objects.filter(id=38275).all()[0].for_vgrid()]
vgrid_widget(
    video_meta=video_meta,
    vis_format=VideoBlockFormat(video_meta=video_meta))

VGridWidget(vgrid_spec={'interval_blocks': [{'video_id': 38275, 'interval_sets': [{'interval_set': [{'bounds':…

# Load Interviews

In [4]:
def get_fps_map(vids):
    vs = Video.objects.filter(id__in=vids)
    return {v.id: v.fps for v in vs}

def frame_second_conversion(c, mode='f2s'):
    fps_map = get_fps_map(set(c.get_grouped_intervals().keys()))
    
    def second_to_frame(fps):
        def map_fn(intrvl):
            i2 = intrvl.copy()
            curr_bounds = intrvl['bounds'].copy()
            curr_bounds['t1'] = int(curr_bounds['t1']*fps)
            curr_bounds['t2'] = int(curr_bounds['t2']*fps)
            i2['bounds'] = curr_bounds
            return i2
        return map_fn
    
    def frame_to_second(fps):
        def map_fn(intrvl):
            i2 = intrvl.copy()
            curr_bounds = intrvl['bounds'].copy()
            curr_bounds['t1'] = int(curr_bounds['t1']/fps)
            curr_bounds['t2'] = int(curr_bounds['t2']/fps)
            i2['bounds'] = curr_bounds
            return i2
        return map_fn
    
    if mode=='f2s':
        fn = frame_to_second
    if mode=='s2f':
        fn = second_to_frame
    output = {}
    for vid, intervals in c.get_grouped_intervals().items():
        output[vid] = intervals.map(fn(fps_map[vid]))
    return IntervalSetMapping(output)

def frame_to_second_collection(c):
    return frame_second_conversion(c, 'f2s')

def second_to_frame_collection(c):
    return frame_second_conversion(c, 's2f')

In [5]:
gt_interviews_all = LabeledInterview.objects.filter(
    video_id__in=dev_set
).all()

In [6]:
interviews_ism = ingest.ism_from_django_qs(
    gt_interviews_all,
    bounds_schema={ 't1': 'start', 't2': 'end' },
    with_payload=lambda row: {
        'original': row.original,
        'guest1': row.guest1,
        'guest2': row.guest2,
        'interviewer1': row.interviewer1,
        'interviewer2': row.interviewer2
    }
)

In [7]:
bernie_interviews = interviews_ism.filter(
    payload_satisfies(lambda p: (
        p['guest1'] == 'bernie sanders' and
#                 p['interviewer1'] == 'jake tapper' and
        p['guest2'] is None and
#         p['interviewer2'] is None and
        p['original']
    ))
)

In [8]:
video_meta = [
    Video.objects.get(id=vid).for_vgrid()
    for vid in dev_set
]

In [9]:
video_meta[0].id

38275

In [10]:
all_videos = IntervalSetMapping({
    vm.id: IntervalSet([Interval(Bounds3D(0, vm.num_frames / vm.fps))])
    for vm in video_meta
})

In [11]:
vgrid_widget(
    video_meta=[
        Video.objects.get(id=vid).for_vgrid()
        for vid in dev_set
    ],
    vis_format=VideoBlockFormat(imaps = [
        ('all interviews', interviews_ism),
        ('bernie interviews', interviews_ism.filter(
            payload_satisfies(lambda p: (
                p['guest1'] == 'bernie sanders' and
                p['guest2'] is None and
                p['original']
            ))
        ))
    ])
)

VGridWidget(vgrid_spec={'interval_blocks': [{'video_id': 45698, 'interval_sets': [{'interval_set': [{'bounds':…

In [12]:
bernie_qs = FaceIdentity.objects.filter(
    identity__name='bernie sanders',
    face__frame__video_id__in=dev_set
).annotate(
    min_frame=F('face__frame__number'),
    max_frame=F('face__frame__number'),
    video_id=F('face__frame__video_id'),
    bbox_x1=F('face__bbox_x1'),
    bbox_y1=F('face__bbox_y1'),
    bbox_x2=F('face__bbox_x2'),
    bbox_y2=F('face__bbox_y2'),
    fps=F('face__frame__video__fps')
)

In [13]:
# Takes about 20 seconds to run!
bernie_intervals = ingest.ism_from_django_qs(
    bernie_qs,
    bounds_schema={
        't1': 'min_frame',
        't2': 'max_frame',
        'x1': 'bbox_x1',
        'x2': 'bbox_x2',
        'y1': 'bbox_y1',
        'y2': 'bbox_y2',
        'payload': 'fps'
    }
).filter(lambda intrvl: (intrvl['t1'] % math.floor(intrvl['payload'] * 3)) == 0)

In [14]:
hosts_qs = FaceIdentity.objects.filter(
    face__frame__video_id__in=dev_set,
    face__is_host=True
).annotate(
    min_frame=F('face__shot__min_frame'),
    max_frame=F('face__shot__max_frame'),
    video_id=F('face__frame__video_id'),
    bbox_x1=F('face__bbox_x1'),
    bbox_y1=F('face__bbox_y1'),
    bbox_x2=F('face__bbox_x2'),
    bbox_y2=F('face__bbox_y2'),
    fps=F('face__frame__video__fps')
)

In [15]:
# Takes about 4 and a half minutes to run!
host_intervals = ingest.ism_from_django_qs(
    hosts_qs,
    bounds_schema={
        't1': 'min_frame',
        't2': 'max_frame',
        'x1': 'bbox_x1',
        'x2': 'bbox_x2',
        'y1': 'bbox_y1',
        'y2': 'bbox_y2',
        'payload': 'fps'
    }
)#.filter(lambda intrvl: (intrvl['t1'] % math.floor(intrvl['payload'] * 3)) == 0)

In [16]:
vgrid_widget(
    video_meta=[
        Video.objects.get(id=vid).for_vgrid()
        for vid in dev_set
    ],
    vis_format=VideoBlockFormat(
        imaps = [
            ('all', all_videos),
            ('bernie interviews', interviews_ism.filter(
                payload_satisfies(lambda p: (
                    p['guest1'] == 'bernie sanders' and
                    p['guest2'] is None and
                    p['original']
                ))
            )),
            ('bernie', frame_to_second_collection(bernie_intervals).dilate(1.5)),
            ('host', frame_to_second_collection(host_intervals).dilate(1.5)),
        ]
    )
)

VGridWidget(vgrid_spec={'interval_blocks': [{'video_id': 45698, 'interval_sets': [{'interval_set': [{'bounds':…

# Develop Query

In [17]:
bernie_with_host = bernie_intervals.join(
    host_intervals,
    predicate = overlaps(),
    merge_op = lambda i1, i2: Interval(
        Bounds3D.intersect_time_span_space(i1['bounds'], i2['bounds']),
        i1
    ),
    window=0
)

In [18]:
vgrid_widget(
    video_meta=[
        Video.objects.get(id=vid).for_vgrid()
        for vid in dev_set
    ],
    vis_format=VideoBlockFormat(
        imaps = [
            ('bernie with host', frame_to_second_collection(bernie_with_host)),
            ('bernie interviews', interviews_ism.filter(
                payload_satisfies(lambda p: (
                    p['guest1'] == 'bernie sanders' and
                    p['guest2'] is None and
                    p['original']
                ))
            )),
            ('bernie', frame_to_second_collection(bernie_intervals).dilate(1.5)),
            ('host', frame_to_second_collection(host_intervals).dilate(1.5)),
        ]
    )
)

VGridWidget(vgrid_spec={'interval_blocks': [{'video_id': 52945, 'interval_sets': [{'interval_set': [{'bounds':…

In [19]:
bernie_with_host_seconds = frame_to_second_collection(bernie_with_host).dilate(1.5)

In [20]:
bernie_alone_seconds = frame_to_second_collection(
    bernie_intervals
).dilate(1.5).minus(
    bernie_with_host_seconds
)

In [21]:
vgrid_widget(
    video_meta=[
        Video.objects.get(id=vid).for_vgrid()
        for vid in dev_set
    ],
    vis_format=VideoBlockFormat(
        imaps = [
            ('bernie with host', frame_to_second_collection(bernie_with_host)),
            ('bernie alone', bernie_alone_seconds),
            ('bernie', frame_to_second_collection(bernie_intervals).dilate(1.5)),
            ('host', frame_to_second_collection(host_intervals).dilate(1.5)),
        ]
    )
)

VGridWidget(vgrid_spec={'interval_blocks': [{'video_id': 52945, 'interval_sets': [{'interval_set': [{'bounds':…

In [22]:
interview_candidates = bernie_with_host_seconds.dilate(5).coalesce(
    ('t1', 't2'),
    Bounds3D.span
).dilate(-5).join(
    bernie_alone_seconds.dilate(5).coalesce(
        ('t1', 't2'),
        Bounds3D.span
    ).dilate(-5),
    predicate = or_pred(
        before(max_dist = 5),
        after(max_dist = 5),
        overlaps()
    ),
    merge_op = lambda i1, i2: Interval(i1['bounds'].span(i2['bounds'])),
    window = 10
).union(
    bernie_with_host_seconds.dilate(5).coalesce(
        ('t1', 't2'),
        Bounds3D.span
    ).dilate(-5)
).dilate(5).coalesce(
    ('t1', 't2'),
    Bounds3D.span
).dilate(-5).filter_size(min_size = 60)

In [23]:
vgrid_widget(
    video_meta=[
        Video.objects.get(id=vid).for_vgrid()
        for vid in dev_set
    ],
    vis_format=VideoBlockFormat(
        imaps = [
            ('interview candidates', interview_candidates),
            ('bernie interviews', interviews_ism.filter(
                payload_satisfies(lambda p: (
                    p['guest1'] == 'bernie sanders' and
                    p['guest2'] is None and
                    p['original']
                ))
            )),
        ]
    )
)

VGridWidget(vgrid_spec={'interval_blocks': [{'video_id': 52945, 'interval_sets': [{'interval_set': [{'bounds':…

# Convert to segments for precision/recall/F1

In [24]:
interval = 30
segs_dict = {}
for video_id in dev_set:
    video = Video.objects.get(id=video_id)
    iset = IntervalSet([
        Interval(Bounds3D(i, i), video.fps)
        for i in range(0, video.num_frames) if (i % (
            math.floor(video.fps * 3) * (interval / 3)
        )) == 0
    ])
    segs_dict[video_id] = iset
    
segments = frame_to_second_collection(IntervalSetMapping(segs_dict)).dilate(interval / 2)

In [25]:
segments_all_negative = segments.map(
    lambda intrvl: Interval(intrvl['bounds'], 0)
)

In [26]:
interview_segments = segments.filter_against(
    interviews_ism.filter(
        payload_satisfies(lambda p: (
            p['guest1'] == 'bernie sanders' and
#                 p['interviewer1'] == 'jake tapper' and
            p['guest2'] is None and
#             p['interviewer2'] is None and
            p['original']
        ))
    ), predicate=overlaps()
).map(
    lambda intrvl: Interval(intrvl['bounds'], 1)
)

interview_labels = segments_all_negative.minus(
    interview_segments
).union(interview_segments)

print(interview_segments.size())
print(interview_labels.size())

{52945: 22, 34642: 22, 38275: 16, 42756: 23}
{45698: 124, 38275: 123, 42756: 124, 33541: 123, 34359: 124, 52749: 245, 23184: 123, 52945: 124, 34642: 124, 10323: 123, 38420: 124, 16215: 63, 8859: 123, 20380: 124, 55711: 124, 20450: 123, 13927: 124, 57384: 123, 19882: 124, 17458: 123, 3952: 123, 37170: 124, 19959: 124, 41725: 123}


In [27]:
interview_prediction_segments = segments.filter_against(
    interview_candidates,
    predicate = overlaps()
).map(lambda intrvl: Interval(intrvl['bounds'], 1))

interview_prediction_labels = segments_all_negative.minus(
    interview_prediction_segments
).union(interview_prediction_segments)

print(interview_prediction_segments.size())
print(interview_prediction_labels.size())

{52945: 22, 34642: 22, 38275: 17, 42756: 22}
{45698: 124, 38275: 123, 42756: 124, 33541: 123, 34359: 124, 52749: 245, 23184: 123, 52945: 124, 34642: 124, 10323: 123, 38420: 124, 16215: 63, 8859: 123, 20380: 124, 55711: 124, 20450: 123, 13927: 124, 57384: 123, 19882: 124, 17458: 123, 3952: 123, 37170: 124, 19959: 124, 41725: 123}


In [28]:
prediction_labels = interview_prediction_labels.join(
    interview_labels,
    predicate = equal(),
    merge_op = lambda i1, i2: Interval(
        i1['bounds'],
        'tp' if i1['payload'] == i2['payload'] and i1['payload'] == 1 else
        'tn' if i1['payload'] == i2['payload'] and i1['payload'] == 0 else
        'fp' if i1['payload'] != i2['payload'] and i1['payload'] == 1 else
        'fn'
    )
)

In [29]:
prediction_labels.filter(payload_satisfies(lambda p: p == 'tp')).size()

{34642: 22, 38275: 16, 42756: 22, 52945: 22}

In [30]:
prediction_labels.filter(payload_satisfies(lambda p: p == 'tn')).size()

{3952: 123,
 8859: 123,
 10323: 123,
 13927: 124,
 16215: 63,
 17458: 123,
 19882: 124,
 19959: 124,
 20380: 124,
 20450: 123,
 23184: 123,
 33541: 123,
 34359: 124,
 34642: 102,
 37170: 124,
 38275: 106,
 38420: 124,
 41725: 123,
 42756: 101,
 45698: 124,
 52749: 245,
 52945: 102,
 55711: 124,
 57384: 123}

In [31]:
prediction_labels.filter(payload_satisfies(lambda p: p == 'fp')).size()

{38275: 1}

In [32]:
prediction_labels.filter(payload_satisfies(lambda p: p == 'fn')).size()

{42756: 1}

In [33]:
def precision_recall_f1(pred_labels):
    def sum_values(obj):
        return sum([v for v in list(obj.values())])
    tp = sum_values(prediction_labels.filter(payload_satisfies(lambda p: p == 'tp')).size())
    tn = sum_values(prediction_labels.filter(payload_satisfies(lambda p: p == 'tn')).size())
    fp = sum_values(prediction_labels.filter(payload_satisfies(lambda p: p == 'fp')).size())
    fn = sum_values(prediction_labels.filter(payload_satisfies(lambda p: p == 'fn')).size())
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * precision * recall / (precision + recall)
    
    return (precision, recall, f1, tp, tn, fp, fn)

In [34]:
precision_recall_f1(prediction_labels)

(0.9879518072289156, 0.9879518072289156, 0.9879518072289156, 82, 2942, 1, 1)

# Run on the test set

## Load Data

In [35]:
gt_interviews_all = LabeledInterview.objects.filter(
    video_id__in=test_set
).all()
interviews_ism = ingest.ism_from_django_qs(
    gt_interviews_all,
    bounds_schema={ 't1': 'start', 't2': 'end' },
    with_payload=lambda row: {
        'original': row.original,
        'guest1': row.guest1,
        'guest2': row.guest2,
        'interviewer1': row.interviewer1,
        'interviewer2': row.interviewer2
    }
)
bernie_interviews = interviews_ism.filter(
    payload_satisfies(lambda p: (
        p['guest1'] == 'bernie sanders' and
#                 p['interviewer1'] == 'jake tapper' and
        p['guest2'] is None and
#         p['interviewer2'] is None and
        p['original']
    ))
)
video_meta = [
    Video.objects.get(id=vid).for_vgrid()
    for vid in test_set
]
all_videos = IntervalSetMapping({
    vm.id: IntervalSet([Interval(Bounds3D(0, vm.num_frames / vm.fps))])
    for vm in video_meta
})
bernie_qs = FaceIdentity.objects.filter(
    identity__name='bernie sanders',
    face__frame__video_id__in=test_set
).annotate(
    min_frame=F('face__frame__number'),
    max_frame=F('face__frame__number'),
    video_id=F('face__frame__video_id'),
    bbox_x1=F('face__bbox_x1'),
    bbox_y1=F('face__bbox_y1'),
    bbox_x2=F('face__bbox_x2'),
    bbox_y2=F('face__bbox_y2'),
    fps=F('face__frame__video__fps')
)
# Takes about 20 seconds to run!
bernie_intervals = ingest.ism_from_django_qs(
    bernie_qs,
    bounds_schema={
        't1': 'min_frame',
        't2': 'max_frame',
        'x1': 'bbox_x1',
        'x2': 'bbox_x2',
        'y1': 'bbox_y1',
        'y2': 'bbox_y2',
        'payload': 'fps'
    }
).filter(lambda intrvl: (intrvl['t1'] % math.floor(intrvl['payload'] * 3)) == 0)
hosts_qs = FaceIdentity.objects.filter(
    face__frame__video_id__in=test_set,
    face__is_host=True
).annotate(
    min_frame=F('face__shot__min_frame'),
    max_frame=F('face__shot__max_frame'),
    video_id=F('face__frame__video_id'),
    bbox_x1=F('face__bbox_x1'),
    bbox_y1=F('face__bbox_y1'),
    bbox_x2=F('face__bbox_x2'),
    bbox_y2=F('face__bbox_y2'),
    fps=F('face__frame__video__fps')
)
# Takes about 4 and a half minutes to run!
host_intervals = ingest.ism_from_django_qs(
    hosts_qs,
    bounds_schema={
        't1': 'min_frame',
        't2': 'max_frame',
        'x1': 'bbox_x1',
        'x2': 'bbox_x2',
        'y1': 'bbox_y1',
        'y2': 'bbox_y2',
        'payload': 'fps'
    }
)#.filter(lambda intrvl: (intrvl['t1'] % math.floor(intrvl['payload'] * 3)) == 0)

## Run Query

In [36]:
bernie_with_host = bernie_intervals.join(
    host_intervals,
    predicate = overlaps(),
    merge_op = lambda i1, i2: Interval(
        Bounds3D.intersect_time_span_space(i1['bounds'], i2['bounds']),
        i1
    ),
    window=0
)
bernie_with_host_seconds = frame_to_second_collection(bernie_with_host).dilate(1.5)
bernie_alone_seconds = frame_to_second_collection(
    bernie_intervals
).dilate(1.5).minus(
    bernie_with_host_seconds
)
interview_candidates = bernie_with_host_seconds.dilate(5).coalesce(
    ('t1', 't2'),
    Bounds3D.span
).dilate(-5).join(
    bernie_alone_seconds.dilate(5).coalesce(
        ('t1', 't2'),
        Bounds3D.span
    ).dilate(-5),
    predicate = or_pred(
        before(max_dist = 5),
        after(max_dist = 5),
        overlaps()
    ),
    merge_op = lambda i1, i2: Interval(i1['bounds'].span(i2['bounds'])),
    window = 10
).union(
    bernie_with_host_seconds.dilate(5).coalesce(
        ('t1', 't2'),
        Bounds3D.span
    ).dilate(-5)
).dilate(5).coalesce(
    ('t1', 't2'),
    Bounds3D.span
).dilate(-5).filter_size(min_size = 60)

## Convert to Segments

In [37]:
interval = 30
segs_dict = {}
for video_id in test_set:
    video = Video.objects.get(id=video_id)
    iset = IntervalSet([
        Interval(Bounds3D(i, i), video.fps)
        for i in range(0, video.num_frames) if (i % (
            math.floor(video.fps * 3) * (interval / 3)
        )) == 0
    ])
    segs_dict[video_id] = iset
    
segments = frame_to_second_collection(IntervalSetMapping(segs_dict)).dilate(interval / 2)
segments_all_negative = segments.map(
    lambda intrvl: Interval(intrvl['bounds'], 0)
)
interview_segments = segments.filter_against(
    interviews_ism.filter(
        payload_satisfies(lambda p: (
            p['guest1'] == 'bernie sanders' and
#                 p['interviewer1'] == 'jake tapper' and
            p['guest2'] is None and
#             p['interviewer2'] is None and
            p['original']
        ))
    ), predicate=overlaps()
).map(
    lambda intrvl: Interval(intrvl['bounds'], 1)
)

interview_labels = segments_all_negative.minus(
    interview_segments
).union(interview_segments)

interview_prediction_segments = segments.filter_against(
    interview_candidates,
    predicate = overlaps()
).map(lambda intrvl: Interval(intrvl['bounds'], 1))

interview_prediction_labels = segments_all_negative.minus(
    interview_prediction_segments
).union(interview_prediction_segments)

prediction_labels = interview_prediction_labels.join(
    interview_labels,
    predicate = equal(),
    merge_op = lambda i1, i2: Interval(
        i1['bounds'],
        'tp' if i1['payload'] == i2['payload'] and i1['payload'] == 1 else
        'tn' if i1['payload'] == i2['payload'] and i1['payload'] == 0 else
        'fp' if i1['payload'] != i2['payload'] and i1['payload'] == 1 else
        'fn'
    )
)

precision_recall_f1(prediction_labels)

(0.9217391304347826, 0.9906542056074766, 0.954954954954955, 106, 3446, 9, 1)