# Video timeslots

## With the movie selected, retreive the timestamps from the sub file

In [29]:
import pandas as pd
import pysrt
from io import BytesIO

In [2]:
df = pd.read_pickle('films.pkl')

In [21]:
# remove header
subtitles = df.loc[df['Film'] == 'The Town']['Subs'].iloc[0][3:]

In [36]:
subs = pysrt.from_string(subtitles)

In [68]:
timeslots = []

for line in subs:
    if "look" in line.text.lower():
        text = ' '.join([str(line.start), "-->", str(line.end)])
        timeslots.append(text)

In [72]:
with open("looks.txt", "w") as f:
    f.write('\n'.join(timeslots))

## With an editted timestamp file, clip each scene and append them together

Some of the clips sit right next to each other, or are incorrectly placed to match the audio. This is a manual correction step that is not easily automated.

In [9]:
import subprocess

In [10]:
start = []
end = []

lines = []

with open("look.txt", "r") as f:
    lines = f.readlines()
    
for line in lines:
    a, b = line.split('-->')
    start.append(a.strip().replace(',','.'))
    end.append(b.strip().replace(',','.'))

In [11]:
video = 'The Town.mp4'

Since we are not saving the encoding layer, this step will take a while.

In [13]:
resume = 0

In [14]:
for i in range(resume, len(start)):
    command = ['ffmpeg', '-i', video, '-ss', start[i], '-to', end[i], 'clips/{}.mp4'.format(i),
              '-c:v', 'h264', '-c:a', 'aac']
    print(i, start[i], end[i])
    subprocess.run(command, shell=True)

0 00:02:12.549 00:02:16.804
1 00:03:49.938 00:03:52.191
2 00:06:31.641 00:06:34.565
3 00:10:34.717 00:10:36.719
4 00:15:30.513 00:15:34.859
5 00:15:49.198 00:15:50.370
6 00:15:56.038 00:15:57.881
7 00:19:10.107 00:19:11.905
8 00:22:42.778 00:22:45.530
9 00:30:00.048 00:30:03.643
10 00:39:23.611 00:39:25.579
11 00:40:51.282 00:40:54.832
12 00:41:28.027 00:41:31.076
13 00:43:01.245 00:43:03.373
14 00:44:15.820 00:44:18.573
15 00:49:34.680 00:49:38.560
16 00:51:55.863 00:51:59.618
17 00:54:07.911 00:54:10.790
18 00:59:20.223 00:59:23.147
19 01:02:45.887 01:02:47.855
20 01:02:51.977 01:02:53.650
21 01:02:58.316 01:03:01.115
22 01:03:01.278 01:03:06.917
23 01:03:13.581 01:03:16.801
24 01:03:29.973 01:03:32.317
25 01:03:55.457 01:04:00.088
26 01:04:09.179 01:04:13.104
27 01:05:42.355 01:05:44.778
28 01:15:03.582 01:15:07.257
29 01:15:26.689 01:15:30.614
30 01:18:13.000 01:18:15.358
31 01:21:31.963 01:21:36.013
32 01:26:54.827 01:26:57.330
33 01:27:25.000 01:27:27.322
34 01:28:15.616 01:28:20

FFmpeg needs to read from a file to append the clips together. Here we can reuse the encoding from each clip for a significant performance improvement.

In [15]:
with open('clips.txt', 'w') as f:
    for i in range(len(start)):
        f.write("file 'clips/{}.mp4'\n".format(i))

In [16]:
command = ['ffmpeg', '-f', 'concat', '-i', 'clips.txt', '-c', 'copy', 'output.mp4']
subprocess.run(command, shell=True)

CompletedProcess(args=['ffmpeg', '-f', 'concat', '-i', 'clips.txt', '-c', 'copy', 'output.mp4'], returncode=0)