In [35]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import subprocess
from skimage.measure import compare_ssim as ssim
import glob
import numpy as np
import re

In [2]:
import tqdm
import scenedetect
import os

In [4]:
# using PySceneDetect to establish shot boundaries
def shot_detector(video_path, scene_path):
    cmd = 'scenedetect --input %s detect-content --threshold %d list-scenes -o %s' % (video_path, 50, scene_path)
    !{cmd}

In [5]:
# aligning the subtitles with the shot boundaries
def aligner(scenes, subs, noise):
    recap = noise[0]
    intro_starts = noise[1]
    intro_ends = noise[2]
    sneak = noise[3]
    subs['SecondStart'] = subs['startTime'].apply(lambda x: to_seconds_converter(x))
    subs['SecondEnd'] = subs['endTime'].apply(lambda x: to_seconds_converter(x))
    columns = scenes.columns.tolist()
    columns.append('text')
    alignment_df = pd.DataFrame(columns=columns)
    for index, row in scenes.iterrows():
        new_line = row.values
        start_frame = row['Start Frame']
        end_frame = row['End Frame']
        start_time = to_seconds_converter(row['Start Timecode'])
        end_time = to_seconds_converter(row['End Timecode'])
        if start_frame <= recap or (start_frame >= intro_starts and start_frame <= intro_ends) or \
        (end_frame >= intro_starts and end_frame <= intro_ends) or end_frame >= sneak:
            sub_text = False
        else:
            # <= end_time
            sub_result = subs.loc[(subs['SecondStart'] >= start_time) & (subs['SecondStart'] < end_time)]
            if len(sub_result) > 1:
                sub_text = ' '.join(sub_result['text'])
            elif len(sub_result) == 0:
                sub_text = False
            else:
                if not sub_result['text'].to_string().isupper():
                    sub_text = sub_result['text']
                else:
                    sub_text = False
        
        new_line = np.append(new_line, sub_text)
        alignment_df.loc[index] = new_line
    return alignment_df

In [98]:
# cleaning the subtitle data
def preprocess_text(text):
    new_text = a.sub('\\1\\2 ', text)
    new_text = b.sub('ok', new_text)
    new_text = c.sub(' ', new_text)
    final_text = [word for word in new_text.split(' ') if not word[:2].isupper()]
    if len(final_text) < 2:
        return 'False'
    return ' '.join(final_text).strip(' ')

In [8]:
# the function to detect recaps, intro song, and sneak peeks in the video data. 
# The shots that fall into the time stamps that are associated with these three parts of the episode are excluded.
def recap_intro_sneak(subs, video_path):
    gold_recap = cv2.imread('gold/recap.jpg')
    gold_intro = cv2.imread('gold/intro.jpg')
    gold_sneakpeek = cv2.imread('gold/sneakpeek.jpg')
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(5)
    success = True
    while success:
        success, frame = cap.read()
        frameId = int(cap.get(1))
        current_time = cap.get(0)
        
        #current_time <= 32000; 64000
        #if current_time >= 28000 and current_time <= 64000:
        if current_time >= 28000 and current_time <= 124000:
            if frameId % 13 == 0:
            #if frameId % 6 == 0:
                if mse(frame, gold_recap):
                    recap_stops = frameId
                    #print(frameId)
        
        intro_start = duration_converter(subs[subs['text'].str.startswith('*de tijd')].values[0][2])
        
        if current_time == intro_start:
            intro_id = frameId
        
        if current_time >= intro_start + 28000 and current_time <= intro_start + 32000:
            if frameId % 5 == 0:
                if mse(frame, gold_intro):
                    intro_stop = frameId
                    #print(frameId)
        
        if current_time >= 1200000:
            if frameId % 5 == 0:
                if frame is not None:
                    if mse(frame, gold_sneakpeek):
                        sneakpeek_start = frameId 
    cap.release()
    return (recap_stops, intro_id - 25, intro_stop, sneakpeek_start)

In [9]:
# The MSE function to calculate the similarity between prototypical recap, sneak peed, intro song frames 
#and the current frame
def mse(imageA, imageB): 
    # the 'Mean Squared Error' between the two images is the
    # sum of the squared difference between the two images;
    # NOTE: the two images must have the same dimension
    imageA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
    imageB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    
    if err >= 0 and err < 550:
        return True
    return False 
    # return the MSE, the lower the error, the more "similar"
    # the two images are
    # return err

In [7]:
# joining it all together: path_video - is the path to our mp4 GTST files, path_subtitles - the path to the subtitles
# path_scenes and path_aligner - are folders for the metadata to be saved to.
def main_preprocessor(path_video, path_subtitles, path_scenes, path_aligner):
    
    for filepath in glob.iglob('%s/*.mp4' % path_video):
        episode_id = filepath.split('/')[-1].split('_')[0]
        video_name = filepath.split('/')[-1][:-4]
        if episode_id not in filtering:
            scene_path = os.path.join(path_scenes, episode_id)
            print('starting shot detection')
            scenes = shot_detector(filepath, scene_path)
            subtitles = pd.read_csv('%s/gtst_episodes_5491_5710.csv' % path_subtitles, header=0, sep=';')
            replaced_id = episode_id[:-1] + 'T'
            episode_subs = subtitles.loc[subtitles['tapeId']  == replaced_id]
            list_scenes_path = '%s/%s-Scenes.csv' % (scene_path, video_name)
            print(list_scenes_path)
            list_scenes = pd.read_csv(list_scenes_path, header=1, sep=',')
            print('starting noise detection')
            noise = recap_intro_sneak(episode_subs, filepath)
            print('starting alignment')
            aligned_df = aligner(list_scenes, episode_subs, noise)
            aligned_df.to_csv('%s/%s.csv' % (path_aligner, episode_id))
            print('the episode is complete')
    return 'the job is done'

In [57]:
main_preprocessor('video', 'subs', 'list_scenes', 'alignment')

starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248171H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 33983/33983 [03:50<00:00, 147.55frames/s]
[PySceneDetect] Processed 33983 frames in 230.3 seconds (average 147.55 FPS).
[PySceneDetect] Detected 286 scenes, average shot length 4.8 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248171H1/248171H1_Goede_Tijden_Slechte_Tijden_S39_E5707-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Time  |  End Frame  |   End Time   |
-----------------------------------------------------------------------
 |      1  |           0 | 00:00:00.000 

  00:00:05.360,00:00:07.160,00:00:08.560,00:00:12.000,00:00:13.840,00:00:15.960,00:00:19.120,00:00:22.280,00:00:23.160,00:00:24.880,00:00:27.320,00:00:36.160,00:00:51.040,00:00:55.920,00:01:00.240,00:01:07.480,00:01:11.440,00:01:24.160,00:01:29.800,00:01:31.640,00:01:34.200,00:01:35.560,00:01:37.760,00:01:39.920,00:01:41.920,00:01:43.720,00:01:45.480,00:01:49.120,00:01:51.120,00:01:53.120,00:01:54.840,00:01:56.840,00:01:58.480,00:02:00.400,00:02:02.120,00:02:03.840,00:02:05.280,00:02:14.520,00:02:16.320,00:02:24.360,00:02:26.040,00:02:32.040,00:02:36.560,00:02:43.040,00:02:47.200,00:02:49.920,00:02:52.160,00:03:01.120,00:03:07.280,00:03:11.720,00:03:15.680,00:03:17.720,00:03:20.440,00:03:26.120,00:03:35.640,00:03:40.720,00:03:51.600,00:03:53.920,00:03:56.960,00:03:59.120,00:04:08.280,00:04:15.960,00:04:19.280,00:04:24.440,00:04:28.320,00:04:41.360,00:04:46.080,00:04:51.600,00:04:57.000,00:04:58.720,00:05:00.080,00:05:01.920,00:05:07.200,00:05:10.840,00:05:17.120,00:05:24.320,00:05:29.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248093H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 34189/34189 [03:50<00:00, 148.48frames/s]
[PySceneDetect] Processed 34189 frames in 230.3 seconds (average 148.48 FPS).
[PySceneDetect] Detected 271 scenes, average shot length 5.0 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248093H1/248093H1_Goede_Tijden_Slechte_Tijden_S39_E5629-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Time  |  End Frame  |   End Time   |
-----------------------------------------------------------------------
 |      1  |    

[PySceneDetect] Comma-separated timecode list:
  00:00:06.640,00:00:12.000,00:00:14.280,00:00:19.320,00:00:19.920,00:00:20.840,00:00:21.640,00:00:22.600,00:00:23.280,00:00:25.000,00:00:26.200,00:00:27.920,00:00:28.840,00:00:33.600,00:00:34.920,00:00:38.400,00:00:42.640,00:00:45.080,00:00:47.640,00:00:49.160,00:00:51.080,00:00:53.680,00:00:57.120,00:01:00.080,00:01:01.320,00:01:07.440,00:01:13.360,00:01:15.520,00:01:17.520,00:01:19.320,00:01:22.960,00:01:24.720,00:01:26.720,00:01:28.720,00:01:30.720,00:01:32.440,00:01:34.080,00:01:36.000,00:01:37.720,00:01:39.440,00:01:40.880,00:01:59.880,00:02:09.360,00:02:23.520,00:02:28.680,00:02:34.440,00:02:36.600,00:02:40.160,00:02:50.440,00:03:02.800,00:03:07.080,00:03:11.800,00:03:32.440,00:03:42.320,00:03:46.560,00:03:49.520,00:03:52.680,00:03:56.280,00:04:04.800,00:04:15.440,00:04:21.240,00:04:27.360,00:04:31.360,00:04:33.080,00:04:37.200,00:04:41.640,00:04:48.880,00:04:50.800,00:04:55.760,00:04:59.160,00:05:03.160,00:05:05.480,00:05:13.120,00

list_scenes/248105H1/248105H1_Goede_Tijden_Slechte_Tijden_S39_E5641-Scenes.csv
starting noise detection
starting alignment
the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/242043H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 34371/34371 [04:09<00:00, 137.74frames/s]
[PySceneDetect] Processed 34371 frames in 249.5 seconds (average 137.74 FPS).
[PySceneDetect] Detected 347 scenes, average shot length 4.0 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/242043H1/242043H1_Goede_Tijden_Slechte_Tijden_S38_E5615-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Ti

list_scenes/242043H1/242043H1_Goede_Tijden_Slechte_Tijden_S38_E5615-Scenes.csv
starting noise detection
starting alignment
the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248140H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 33793/33793 [04:08<00:00, 136.09frames/s]
[PySceneDetect] Processed 33793 frames in 248.3 seconds (average 136.09 FPS).
[PySceneDetect] Detected 335 scenes, average shot length 4.0 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248140H1/248140H1_Goede_Tijden_Slechte_Tijden_S39_E5676-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Ti

list_scenes/248140H1/248140H1_Goede_Tijden_Slechte_Tijden_S39_E5676-Scenes.csv
starting noise detection
starting alignment
the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248174H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 33088/33088 [03:57<00:00, 139.05frames/s]
[PySceneDetect] Processed 33088 frames in 238.0 seconds (average 139.05 FPS).
[PySceneDetect] Detected 279 scenes, average shot length 4.7 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248174H1/248174H1_Goede_Tijden_Slechte_Tijden_S39_E5710-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Ti

list_scenes/248174H1/248174H1_Goede_Tijden_Slechte_Tijden_S39_E5710-Scenes.csv
starting noise detection
starting alignment
the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248155H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 34144/34144 [03:56<00:00, 144.31frames/s]
[PySceneDetect] Processed 34144 frames in 236.6 seconds (average 144.31 FPS).
[PySceneDetect] Detected 326 scenes, average shot length 4.2 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248155H1/248155H1_Goede_Tijden_Slechte_Tijden_S39_E5691-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Ti

list_scenes/248155H1/248155H1_Goede_Tijden_Slechte_Tijden_S39_E5691-Scenes.csv
starting noise detection
starting alignment
the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248136H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 33410/33410 [03:40<00:00, 151.37frames/s]
[PySceneDetect] Processed 33410 frames in 220.7 seconds (average 151.37 FPS).
[PySceneDetect] Detected 353 scenes, average shot length 3.8 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248136H1/248136H1_Goede_Tijden_Slechte_Tijden_S39_E5672-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Ti

list_scenes/248136H1/248136H1_Goede_Tijden_Slechte_Tijden_S39_E5672-Scenes.csv
starting noise detection
starting alignment
the episode is complete
starting shot detection
[PySceneDetect] Loaded 1 video, framerate: 25.00 FPS, resolution: 1920 x 1080
[PySceneDetect] Downscale factor set to 6, effective resolution: 320 x 180
[PySceneDetect] Scene list CSV file name format:
  $VIDEO_NAME-Scenes.csv
[PySceneDetect] Scene list output directory set:
  list_scenes/248161H1
[PySceneDetect] Detecting scenes...
100%|████████████████████████████████| 33736/33736 [03:04<00:00, 183.07frames/s]
[PySceneDetect] Processed 33736 frames in 184.3 seconds (average 183.06 FPS).
[PySceneDetect] Detected 303 scenes, average shot length 4.5 seconds.
[PySceneDetect] Writing scene list to CSV file:
  list_scenes/248161H1/248161H1_Goede_Tijden_Slechte_Tijden_S39_E5697-Scenes.csv
[PySceneDetect] Scene List:
-----------------------------------------------------------------------
 | Scene # | Start Frame |  Start Ti

list_scenes/248161H1/248161H1_Goede_Tijden_Slechte_Tijden_S39_E5697-Scenes.csv
starting noise detection
starting alignment
the episode is complete


'the job is done'

In [6]:
# getting the log data containing the information about start/end points of the shots and the cleaned subtitle.
# For every shot of the GTST season. This log file is called 'cleaned_subs_log.csv' in our repository
def concatination():
    subtitles = pd.read_csv('subs/gtst_episodes_5491_5710.csv', header=0, sep=';')
    video_path = '/Volumes/2TB'
    alignment_path = 'alignment'
    columns = new_test.columns.tolist()
    columns.append('episode_nr')
    columns.append('video_path')
    resulting_df = pd.DataFrame(columns=columns)
    for filepath in glob.iglob('%s/*.mp4' % video_path):
        episode_id = filepath.split('/')[-1].split('_')[0]
        video_name = filepath.split('/')[-1]
        replaced_id = episode_id[:-1] + 'T'
        s = subtitles.loc[subtitles['tapeId']  == replaced_id]
        episode_nr = s['episode_nr']
        if episode_id != '248108H1' and episode_id != '248139H1':
            aligned_df = pd.read_csv('%s/%s.csv' % (alignment_path, episode_id), index_col=0, header=0)
            aligned_df['tapeId'] = episode_id
            aligned_df['episode_nr'] = episode_nr.unique()[0]
            aligned_df['video_path'] = filepath
            resulting_df = pd.concat([resulting_df, aligned_df], ignore_index=True, sort=False)
    return resulting_df 

In [111]:
result = concatination()

In [115]:
result['index'] = result.index
result.to_csv('log_data.csv', header=True, index=False)

In [112]:
result.head()

Unnamed: 0,Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds),text,tapeId,episode_nr,video_path
0,1,0,00:00:00.000,0.0,184,00:00:07.360,7.36,184,00:00:07.360,7.36,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
1,2,184,00:00:07.360,7.36,221,00:00:08.840,8.84,37,00:00:01.480,1.48,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
2,3,221,00:00:08.840,8.84,320,00:00:12.800,12.8,99,00:00:03.960,3.96,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
3,4,320,00:00:12.800,12.8,363,00:00:14.520,14.52,43,00:00:01.720,1.72,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...
4,5,363,00:00:14.520,14.52,434,00:00:17.360,17.36,71,00:00:02.840,2.84,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...


In [29]:
# cut the whole episode files into shots according to the log file
def shot_cutter():
    log_df = pd.read_csv('log_data.csv', header=0)
    write_path = '/Volumes/Personal/shots'
    for index, row in log_df.iterrows():
        if row['text'] != 'False' and str(row['index']) not in filtering:
            cmd = 'ffmpeg -i %s -c copy -ss %s -to %s %s/%d.mp4' % (row['video_path'], row['Start Timecode'], row['End Timecode'], 
                                                                           write_path, row['index'])
            !{cmd}

### Some time converters

In [10]:
# to ms
def duration_converter(time_string):
    hms = str(time_string).split(':')
    m = int(hms[1])
    s = int(hms[2].split('.')[0])
    ms = int(hms[2].split('.')[1])
    return m * 60000 + s * 1000 + ms

In [11]:
def to_seconds_converter(time_string):
    hms = str(time_string).split(':')
    m = int(hms[1])
    s = int(hms[2].split('.')[0])
    # ms = int(hms[2].split('.')[1])
    return m * 60 + s

In [12]:
# from ms to mm:ss.fff
def duration_to_string(ms):
    last = ms % 1000
    seconds = (ms // 1000) % 60
    minutes = (ms // 1000) // 60
    return '%d:%d.%d' % (minutes, seconds, last)

In [74]:
subs = pd.read_csv('new_log.csv', header=0, sep=';')
subs

Unnamed: 0,Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds),text,tapeId,episode_nr,video_path,index
0,1.0,0.0,00:00:00.000,0.00,184.0,00:00:07.360,7.36,184.0,00:00:07.360,7.36,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,0
1,2.0,184.0,00:00:07.360,7.36,221.0,00:00:08.840,8.84,37.0,00:00:01.480,1.48,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,1
2,3.0,221.0,00:00:08.840,8.84,320.0,00:00:12.800,12.80,99.0,00:00:03.960,3.96,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,2
3,4.0,320.0,00:00:12.800,12.80,363.0,00:00:14.520,14.52,43.0,00:00:01.720,1.72,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,3
4,5.0,363.0,00:00:14.520,14.52,434.0,00:00:17.360,17.36,71.0,00:00:02.840,2.84,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,4
5,6.0,434.0,00:00:17.360,17.36,490.0,00:00:19.600,19.60,56.0,00:00:02.240,2.24,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,5
6,7.0,490.0,00:00:19.600,19.60,524.0,00:00:20.960,20.96,34.0,00:00:01.360,1.36,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,6
7,8.0,524.0,00:00:20.960,20.96,590.0,00:00:23.600,23.60,66.0,00:00:02.640,2.64,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,7
8,9.0,590.0,00:00:23.600,23.60,615.0,00:00:24.600,24.60,25.0,00:00:01.000,1.00,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,8
9,10.0,615.0,00:00:24.600,24.60,687.0,00:00:27.480,27.48,72.0,00:00:02.880,2.88,False,241919H1,5491,/Volumes/2TB/241919H1_Goede_Tijden_Slechte_Tij...,9


## Mute the audio cue

In [None]:
video_path = 'shots/*.mp4'
for video in glob.iglob(video_path):
    video_index = int(video.split('/')[-1].split('.')[0])
    if video_index <= 8561:
        new_path = 'muted/%d.mp4'% video_index
        mute_audio(video, new_path)
        print(video_index)

In [None]:
def mute_audio(video_path, new_path):
    cmd = 'ffmpeg -i %s -an -vcodec copy %s' % (video_path, new_path)
    !{cmd}