In [2]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import subprocess
from skimage.measure import compare_ssim as ssim
import glob
import numpy as np
import re

In [3]:
import tqdm
import scenedetect
import os

In [4]:
# using PySceneDetect to establish shot boundaries
def shot_detector(video_path, scene_path):
    cmd = 'scenedetect --input %s detect-content --threshold %d list-scenes -o %s' % (video_path, 50, scene_path)
    !{cmd}

In [5]:
# aligning the subtitles with the shot boundaries
def aligner(scenes, subs, noise):
    recap = noise[0]
    intro_starts = noise[1]
    intro_ends = noise[2]
    sneak = noise[3]
    subs['SecondStart'] = subs['startTime'].apply(lambda x: to_seconds_converter(x))
    subs['SecondEnd'] = subs['endTime'].apply(lambda x: to_seconds_converter(x))
    columns = scenes.columns.tolist()
    columns.append('text')
    alignment_df = pd.DataFrame(columns=columns)
    for index, row in scenes.iterrows():
        new_line = row.values
        start_frame = row['Start Frame']
        end_frame = row['End Frame']
        start_time = to_seconds_converter(row['Start Timecode'])
        end_time = to_seconds_converter(row['End Timecode'])
        if start_frame <= recap or (start_frame >= intro_starts and start_frame <= intro_ends) or \
        (end_frame >= intro_starts and end_frame <= intro_ends) or end_frame >= sneak:
            sub_text = False
        else:
            # <= end_time
            sub_result = subs.loc[(subs['SecondStart'] >= start_time) & (subs['SecondStart'] < end_time)]
            if len(sub_result) > 1:
                sub_text = ' '.join(sub_result['text'])
            elif len(sub_result) == 0:
                sub_text = False
            else:
                if not sub_result['text'].to_string().isupper():
                    sub_text = sub_result['text']
                else:
                    sub_text = False
        
        new_line = np.append(new_line, sub_text)
        alignment_df.loc[index] = new_line
    return alignment_df

In [98]:
# cleaning the subtitle data
def preprocess_text(text):
    new_text = a.sub('\\1\\2 ', text)
    new_text = b.sub('ok', new_text)
    new_text = c.sub(' ', new_text)
    final_text = [word for word in new_text.split(' ') if not word[:2].isupper()]
    if len(final_text) < 2:
        return 'False'
    return ' '.join(final_text).strip(' ')

In [8]:
# the function to detect recaps, intro song, and sneak peeks in the video data. 
# The shots that fall into the time stamps that are associated with these three parts of the episode are excluded.
def recap_intro_sneak(subs, video_path):
    gold_recap = cv2.imread('gold/recap.jpg')
    gold_intro = cv2.imread('gold/intro.jpg')
    gold_sneakpeek = cv2.imread('gold/sneakpeek.jpg')
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(5)
    success = True
    while success:
        success, frame = cap.read()
        frameId = int(cap.get(1))
        current_time = cap.get(0)
        
        #current_time <= 32000; 64000
        #if current_time >= 28000 and current_time <= 64000:
        if current_time >= 28000 and current_time <= 124000:
            if frameId % 13 == 0:
            #if frameId % 6 == 0:
                if mse(frame, gold_recap):
                    recap_stops = frameId
                    #print(frameId)
        
        intro_start = duration_converter(subs[subs['text'].str.startswith('*de tijd')].values[0][2])
        
        if current_time == intro_start:
            intro_id = frameId
        
        if current_time >= intro_start + 28000 and current_time <= intro_start + 32000:
            if frameId % 5 == 0:
                if mse(frame, gold_intro):
                    intro_stop = frameId
                    #print(frameId)
        
        if current_time >= 1200000:
            if frameId % 5 == 0:
                if frame is not None:
                    if mse(frame, gold_sneakpeek):
                        sneakpeek_start = frameId 
    cap.release()
    return (recap_stops, intro_id - 25, intro_stop, sneakpeek_start)

In [9]:
# The MSE function to calculate the similarity between prototypical recap, sneak peed, intro song frames 
#and the current frame
def mse(imageA, imageB): 
    # the 'Mean Squared Error' between the two images is the
    # sum of the squared difference between the two images;
    # NOTE: the two images must have the same dimension
    imageA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
    imageB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    
    if err >= 0 and err < 550:
        return True
    return False 
    # return the MSE, the lower the error, the more "similar"
    # the two images are
    # return err

In [7]:
# joining it all together: path_video - is the path to our mp4 GTST files, path_subtitles - the path to the subtitles
# path_scenes and path_aligner - are folders for the metadata to be saved to.
def main_preprocessor(path_video, path_subtitles, path_scenes, path_aligner):
    
    for filepath in glob.iglob('%s/*.mp4' % path_video):
        episode_id = filepath.split('/')[-1].split('_')[0]
        video_name = filepath.split('/')[-1][:-4]
        if episode_id not in filtering:
            scene_path = os.path.join(path_scenes, episode_id)
            print('starting shot detection')
            scenes = shot_detector(filepath, scene_path)
            subtitles = pd.read_csv('%s/gtst_episodes_5491_5710.csv' % path_subtitles, header=0, sep=';')
            replaced_id = episode_id[:-1] + 'T'
            episode_subs = subtitles.loc[subtitles['tapeId']  == replaced_id]
            list_scenes_path = '%s/%s-Scenes.csv' % (scene_path, video_name)
            print(list_scenes_path)
            list_scenes = pd.read_csv(list_scenes_path, header=1, sep=',')
            print('starting noise detection')
            noise = recap_intro_sneak(episode_subs, filepath)
            print('starting alignment')
            aligned_df = aligner(list_scenes, episode_subs, noise)
            aligned_df.to_csv('%s/%s.csv' % (path_aligner, episode_id))
            print('the episode is complete')
    return 'the job is done'

In [None]:
main_preprocessor('video', 'subs', 'list_scenes', 'alignment')

In [6]:
# getting the log data containing the information about start/end points of the shots and the cleaned subtitle.
# For every shot of the GTST season. This log file is called 'cleaned_subs_log.csv' in our repository
def concatination():
    subtitles = pd.read_csv('subs/gtst_episodes_5491_5710.csv', header=0, sep=';')
    video_path = '/Volumes/2TB'
    alignment_path = 'alignment'
    columns = new_test.columns.tolist()
    columns.append('episode_nr')
    columns.append('video_path')
    resulting_df = pd.DataFrame(columns=columns)
    for filepath in glob.iglob('%s/*.mp4' % video_path):
        episode_id = filepath.split('/')[-1].split('_')[0]
        video_name = filepath.split('/')[-1]
        replaced_id = episode_id[:-1] + 'T'
        s = subtitles.loc[subtitles['tapeId']  == replaced_id]
        episode_nr = s['episode_nr']
        if episode_id != '248108H1' and episode_id != '248139H1':
            aligned_df = pd.read_csv('%s/%s.csv' % (alignment_path, episode_id), index_col=0, header=0)
            aligned_df['tapeId'] = episode_id
            aligned_df['episode_nr'] = episode_nr.unique()[0]
            aligned_df['video_path'] = filepath
            resulting_df = pd.concat([resulting_df, aligned_df], ignore_index=True, sort=False)
    return resulting_df 

In [111]:
result = concatination()

In [115]:
result['index'] = result.index
result.to_csv('log_data.csv', header=True, index=False)

In [112]:
result.head()

Unnamed: 0,Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds),text,tapeId,episode_nr,video_path
0,1,0,00:00:00.000,0.0,184,00:00:07.360,7.36,184,00:00:07.360,7.36,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
1,2,184,00:00:07.360,7.36,221,00:00:08.840,8.84,37,00:00:01.480,1.48,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
2,3,221,00:00:08.840,8.84,320,00:00:12.800,12.8,99,00:00:03.960,3.96,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
3,4,320,00:00:12.800,12.8,363,00:00:14.520,14.52,43,00:00:01.720,1.72,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
4,5,363,00:00:14.520,14.52,434,00:00:17.360,17.36,71,00:00:02.840,2.84,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...


In [29]:
# cut the whole episode files into shots according to the log file
def shot_cutter():
    log_df = pd.read_csv('log_data.csv', header=0)
    write_path = '/Volumes/Personal/shots'
    for index, row in log_df.iterrows():
        if row['text'] != 'False' and str(row['index']) not in filtering:
            cmd = 'ffmpeg -i %s -c copy -ss %s -to %s %s/%d.mp4' % (row['video_path'], row['Start Timecode'], row['End Timecode'], 
                                                                           write_path, row['index'])
            !{cmd}

### Some time converters

In [10]:
# to ms
def duration_converter(time_string):
    hms = str(time_string).split(':')
    m = int(hms[1])
    s = int(hms[2].split('.')[0])
    ms = int(hms[2].split('.')[1])
    return m * 60000 + s * 1000 + ms

In [11]:
def to_seconds_converter(time_string):
    hms = str(time_string).split(':')
    m = int(hms[1])
    s = int(hms[2].split('.')[0])
    # ms = int(hms[2].split('.')[1])
    return m * 60 + s

In [12]:
# from ms to mm:ss.fff
def duration_to_string(ms):
    last = ms % 1000
    seconds = (ms // 1000) % 60
    minutes = (ms // 1000) // 60
    return '%d:%d.%d' % (minutes, seconds, last)

In [4]:
subs = pd.read_csv('new_log.csv', header=0, sep=';')

## Mute the audio cue

In [None]:
video_path = 'shots/*.mp4'
for video in glob.iglob(video_path):
    video_index = int(video.split('/')[-1].split('.')[0])
    if video_index <= 8561:
        new_path = 'muted/%d.mp4'% video_index
        mute_audio(video, new_path)
        print(video_index)

In [None]:
def mute_audio(video_path, new_path):
    cmd = 'ffmpeg -i %s -an -vcodec copy %s' % (video_path, new_path)
    !{cmd}