# Video timeslots

## With the movie selected, retreive the timestamps from the sub file

In [1]:
import pandas as pd
import pysrt
from io import BytesIO

In [2]:
df = pd.read_pickle('films.pkl')

In [3]:
# remove header
subtitles = df.loc[df['Film'] == 'The Town']['Subs'].iloc[0][3:]

In [4]:
subs = pysrt.from_string(subtitles)

In [5]:
timeslots = []

for line in subs:
    if "look" in line.text.lower():
        text = ' '.join([str(line.start), "-->", str(line.end)])
        timeslots.append(text)

In [6]:
with open("look_draft.txt", "w") as f:
    f.write('\n'.join(timeslots))

## With an editted timestamp file, clip each scene and append them together

Some of the clips sit right next to each other, or are incorrectly placed to match the audio. This is a manual correction step that is not easily automated.

In [7]:
import subprocess

In [8]:
start = []
end = []

lines = []

with open("look.txt", "r") as f:
    lines = f.readlines()
    
for line in lines:
    a, b = line.split('-->')
    start.append(a.strip().replace(',','.'))
    end.append(b.strip().replace(',','.'))

In [9]:
video = 'The Town.mp4'

Since we are not saving the encoding layer, this step will take a while.

In [10]:
resume = 48

In [11]:
for i in range(resume, len(start)):
    command = ['ffmpeg', '-i', video, '-ss', start[i], '-to', end[i], 'clips/{}.mp4'.format(i),
              '-c:v', 'h264', '-c:a', 'aac']
    print(i, start[i], end[i])
    subprocess.run(command, shell=True)

FFmpeg needs to read from a file to append the clips together. Here we can reuse the encoding from each clip for a significant performance improvement.

In [12]:
with open('clips.txt', 'w') as f:
    for i in range(len(start)):
        f.write("file 'clips/{}.mp4'\n".format(i))

In [13]:
command = ['ffmpeg', '-f', 'concat', '-i', 'clips.txt', '-c', 'copy', 'output.mp4']
subprocess.run(command, shell=True)