<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Load-data" data-toc-modified-id="Load-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load data</a></span></li><li><span><a href="#Test-single-video-with-rekall" data-toc-modified-id="Test-single-video-with-rekall-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Test single video with rekall</a></span></li><li><span><a href="#Test-single-video-with-haotian's-algorithm" data-toc-modified-id="Test-single-video-with-haotian's-algorithm-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Test single video with haotian's algorithm</a></span></li><li><span><a href="#Test-videos-with-ground-truth" data-toc-modified-id="Test-videos-with-ground-truth-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Test videos with ground truth</a></span></li><li><span><a href="#Process-labeled-commercial-groundtruth" data-toc-modified-id="Process-labeled-commercial-groundtruth-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Process labeled commercial groundtruth</a></span></li><li><span><a href="#Detect-commercial-in-all-videos" data-toc-modified-id="Detect-commercial-in-all-videos-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Detect commercial in all videos</a></span></li><li><span><a href="#Collect-commercial-result" data-toc-modified-id="Collect-commercial-result-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Collect commercial result</a></span></li><li><span><a href="#Check-commercial-result" data-toc-modified-id="Check-commercial-result-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Check commercial result</a></span></li></ul></div>

In [None]:
from query.models import Video
from esper.prelude import esper_widget
from rekall.interval_list import IntervalList
from esper.rekall import *
from esper.commercial_detect import detect_commercial, visualize_commercial
from esper.commercial_detect_rekall import detect_commercial_rekall, solve_parallel

import os
import pickle
import random

get_ipython().magic('matplotlib inline')
get_ipython().magic('reload_ext autoreload')
get_ipython().magic('autoreload 2')

# Load data

In [None]:
# load data
black_frame_dict = pickle.load(open('/app/data/black_frame_all.pkl', 'rb'))
additional_field = pickle.load(open('/app/data/addtional_field.pkl', 'rb'))
commercial_gt = pickle.load(open('/app/data/commercial_gt.pkl', 'rb'))

In [None]:
video = Video.objects.filter(path__contains='CNNW_20110513_150000_CNN_Newsroom')[0]
video_name = video.item_name()

# Test single video with rekall

In [None]:
result = detect_commercial_rekall(video, '/app/data/subs/aligned/'+video_name+'.word.srt', blackframe_list=black_frame_dict[video.id], debug=True, verbose=False)

In [None]:
# Debug commercial blocks
def intrvllists_time2fid(intrvllists, fps):
    return IntervalList([(i.start*video.fps, i.end*video.fps, 0) for i in intrvllists.get_intervals()])

commercial_intrvllists = {}
commercial_intrvllists = intrvllists_to_result({video.id: intrvllists_time2fid(result['black'], video.fps)}, color='black')
add_intrvllists_to_result(commercial_intrvllists,
                         {video.id: intrvllists_time2fid(result['arrow'], video.fps)},
                          color='black')
add_intrvllists_to_result(commercial_intrvllists,
                         {video.id: intrvllists_time2fid(result['commercials_raw'], video.fps)},
                          color='orange')
add_intrvllists_to_result(commercial_intrvllists,
                         {video.id: intrvllists_time2fid(result['lowercase'], video.fps)},
                          color='gray')
add_intrvllists_to_result(commercial_intrvllists,
                         {video.id: intrvllists_time2fid(result['blank'], video.fps)},
                          color='blue')
add_intrvllists_to_result(commercial_intrvllists,
                         {video.id: intrvllists_time2fid(result['commercials'], video.fps)},
                          color='red')
add_intrvllists_to_result(commercial_intrvllists,
                         {video.id: IntervalList([(start*video.fps, end*video.fps, 0) for (start, end) in commercial_gt['all'][video.id]]) },
                          color='green')
esper_widget(commercial_intrvllists)

# Test single video with haotian's algorithm

In [None]:
video_desp = {'fps': video.fps, 'frame_w': video.width, 'frame_h': video.height, 'video_length': video.num_frames/video.fps, 'video_frames': video.num_frames}
commercial_list = detect_commercial(video_desp, '/app/data/subs10/'+video_name, blackframe_list=black_frame_dict[video.id])
commercial_list

# Test videos with ground truth 

In [None]:
com_dict_us, com_dict_gt = {}, {}
for video_id in sorted(commercial_gt['all']):
    com_gt = commercial_gt['all'][video_id]
    video = Video.objects.filter(id=video_id)[0]
    video_name = video.item_name()
    transcript_path = "/app/data/subs/aligned/" + video_name + '.word.srt'
    print(video_name)
    if not video_id in black_frame_dict or not os.path.exists(transcript_path):
        continue
#     video_desp = {'fps': video.fps, 'frame_w': video.width, 'frame_h': video.height, 'video_length': video.num_frames/video.fps, 'video_frames': video.num_frames}
#     commercial_list = detect_commercial(video_desp, '/app/data/subs10/'+video_name, blackframe_list=black_frame_dict[video_name], verbose=False)    
    com_us = detect_commercial_rekall(video, transcript_path, blackframe_list=black_frame_dict[video_id], debug=False)    
    com_dict_us[video_name] = com_us
    com_dict_gt[video_name] = com_gt

In [None]:
# visualize commercial as strip graph
visualize_commercial(com_dict_gt, com_dict_us)

# Process labeled commercial groundtruth

In [None]:
commercial_gt = pickle.load(open('/app/data/commercial_gt.pkl', 'rb'))

In [None]:
from esper.util import time2second
com_gt = {}
for line in open('/app/data/commercial_gt.csv'):
    columns = line[:-1].split(',')
    video_id = int(columns[0])
    video = Video.objects.filter(id=video_id)[0]
    
    duration = int(video.num_frames / video.fps)
    com_gt[video_id] = []
#     print(video_id)
    for i in range(1, len(columns)):
        if columns[i] == '':
            continue
        span = columns[i].split('-')
        start = span[0].split(':')
        end = span[1].split(':')
        start = time2second((int(start[0]), int(start[1]), int(start[2])))
        end = time2second((int(end[0]), int(end[1]), int(end[2])))
        com_gt[video_id].append((start, end))
com_gt

In [None]:
# commercial_gt['10y'] = com_gt
commercial_gt['all'] = {**commercial_gt['all'], **commercial_gt['10y']}

In [None]:
pickle.dump(commercial_gt, open('/app/data/commercial_gt.pkl', 'wb'))

# Detect commercial in all videos

In [None]:
black_frame_dict = pickle.load(open('/app/data/black_frame_all.pkl', 'rb'))
result_dict = pickle.load(open('/app/result/commercial/commercial_dict.pkl', 'rb'))
additional_field = pickle.load(open('/app/data/addtional_field_all.pkl', 'rb'))
videos = Video.objects.all()

In [None]:
param_list = []
for i, video in enumerate(videos):
    if video.id in black_frame_dict and additional_field[video.id]['aligned_transcript'] and not video.id in result_dict:
        param_list.append(({'id': video.id, 'video_name': video.item_name(), 'fps': video.fps, 'num_frames': video.num_frames}, 
                           black_frame_dict[video.id]))

In [None]:
if __name__ == "__main__":

    solve_parallel(param_list, res_dict_path='/app/result/commercial/commercial_dict.pkl', nthread=32, use_process=True)

# Collect commercial result

In [None]:
res_dict = pickle.load(open('/app/result/commercial/commercial_dict.pkl', 'rb'))
for i in range(64):
    res_dict_tmp = pickle.load(open('/app/result/commercial/dict_{}.pkl'.format(i), 'rb'))
    res_dict = {**res_dict, **res_dict_tmp}

In [None]:
pickle.dump(res_dict, open('/app/result/commercial/commercial_dict.pkl', 'wb'))

# Check commercial result

In [None]:
commercial_dict = pickle.load(open('/app/result/commercial/commercial_dict.pkl', 'rb'))

In [None]:
commercial_dict_small = {id: commercial_dict[id] for id in random.sample(commercial_dict.keys(), 100)}

In [None]:
# Display commercial blocks with esper widget

commercial_intrvllists = {}
video_ids = []
for video_id in sorted(commercial_dict_small):
    video = Video.objects.filter(id=video_id)[0]
    video_ids.append(video.id)
    commercial_intrvllists[video.id] = IntervalList([(start*video.fps, end*video.fps, 0) for (start, end) in commercial_dict[video_id]])

esper_widget(intrvllists_to_result(commercial_intrvllists))