# Make subclips from 4 camera recordings

copy this file to a folder containing just 1 recording session. the folder should contain some video files labeled with the camera angle and a timestamps.txt file of format:

    WORD    START   END
    free.MP4	44732.4020864583	44732.4021164352
    help.MP4	44732.4024060185	44732.4024640046


#### Import Libraries  & define functions to concatenate frames for inline display

In [None]:
!brew install ffmpeg

In [1]:
import csv
import glob
import os

import cv2
import moviepy.editor as mpy
import numpy as np
import pandas as pd
from IPython.display import Video, clear_output, display
from PIL import Image
from tqdm.auto import tqdm


def concatFrames(front, below, left, right,height=400, inter=cv2.INTER_AREA):

    f  = cv2.resize(front,  (int(height/front.shape[0]*front.shape[1]), height), interpolation = inter)
    l  = cv2.resize(below,  (int(height/below.shape[0]*below.shape[1]), height), interpolation = inter)
    lf = cv2.resize(left,   (int(height/left .shape[0]*left .shape[1]), height), interpolation = inter)
    rt = cv2.resize(right,  (int(height/right.shape[0]*right.shape[1]), height), interpolation = inter)

    return cv2.hconcat([ f  , l , lf , rt  ])

def get_frames(vid, t, first_frame, step_size, count=8, height=300):
    frame_nums=range(first_frame,first_frame+step_size*count,step_size)
    frames = [cv2.putText( vid.get_frame(t+ f/vid.fps).copy(), str(f),(200,300),cv2.FONT_HERSHEY_TRIPLEX,int(vid.size[0]/1080*12), (125,63,250),5) for f in frame_nums]
    return cv2.resize(cv2.hconcat(frames),(int(count*height/vid.size[1]*vid.size[0]),height),interpolation=cv2.INTER_AREA)

#### Paths

In [None]:
personNumber  = '255'
# sessionNumber = '4'
# personName    = ''
cameras = ['front', 'below', 'left', 'right']

videosFolder = os.path.abspath(os.curdir)
clipsFolder  = os.path.abspath(os.path.join(
    os.curdir,'..','..','PSL_Clips','HFAD_Book1','Person'+str(personNumber)
))
if not os.path.exists(clipsFolder):
    print('clipsFolder does not exit. making one...')
    os.makedirs(clipsFolder)

"""timestamps.txt format
WORD    START   END
ح.MP4	44732.4020864583	44732.4021164352
حج.MP4	44732.4024060185	44732.4024640046
"""
timestamps = pd.read_csv(videosFolder+'/timestamps.txt', sep='\t')
first_timestamp = timestamps.at[0,'START']
# print('got', len(timestamps), 'timestamps')
timestamps

#### read videos

In [None]:
# DEFINE PATHS
videoPaths = [
    os.path.join(videosFolder,filename)
    for filename in ["front.MOV", "below.mp4", "left.mp4", "right.mp4"]
]
assert all([os.path.exists(vp) for vp in videoPaths])

# LOAD
videos = []
for p in videoPaths:
    vid = mpy.VideoFileClip( p )
    # fix bug in moviepy: vertical videos have wrong shape
    if vid.rotation in (90, 270):
        vid = vid.resize(vid.size[::-1])
        vid.rotation = 0
    videos.append(vid)

# TRANSFORM VIDEO
# fix bug in moviepy: rotate with +0.0001 and then crop
videos[1] = videos[1].add_mask().rotate( 90.0001) .crop(x1=1,y1=1,x2=videos[1].size[1]+1,y2=videos[1].size[0]+1)
# videos[2].fx(mpy.vfx.speedx, 1)
# videos[3].set_fps(30)
# videos[1]=videos[1].fx(mpy.vfx.mirror_x)
# img_clip = mpy.ImageClip('../../../img6.png').set_pos(('left', 'top'))
# clip = mpy.CompositeVideoClip([clip, img_clip.set_duration(clip.duration)])

# SANITY CHECK
display(Image.fromarray(cv2.hconcat([   cv2.resize(videos[0].get_frame(videos[0].duration/2),(225,400),interpolation=cv2.INTER_AREA),
                                        cv2.resize(videos[1].get_frame(videos[0].duration/2),(225,400),interpolation=cv2.INTER_AREA)])))
print(videoPaths)
print([vid.duration for vid in videos])
print([vid.fps      for vid in videos])
print([vid.size     for vid in videos])

#### determine the syncronising point in every video

In [None]:
# time at 4 frames before dark frame (when lights were turned off, measured with Avidemux) + adjustment
sync_time = [    07.873 +  0   /videos[0].fps  , # front
                 45.981 +  0   /videos[1].fps  , # below
                 04.477 +  0   /videos[2].fps  , # left
                 20.885 +  0   /videos[3].fps  ] # right

In [None]:
# SYNC CHECK
front_frames = get_frames(videos[  0  ], sync_time[ 0 ], first_frame = 0, step_size = 1)
below_frames = get_frames(videos[  1  ], sync_time[ 1 ], first_frame = 0, step_size = 1)
left_frames  = get_frames(videos[  2  ], sync_time[ 2 ], first_frame = 0, step_size = 1)
right_frames = get_frames(videos[  3  ], sync_time[ 3 ], first_frame = 0, step_size = 1)

display(Image.fromarray(np.concatenate([front_frames, below_frames, left_frames, right_frames],axis=0)))

#### Display Start & End Time of any sign & test subclip

In [None]:
# = start_of_first_sign - dark_frame + adjustment(see below)
start_TimeDelta = 43.471 - 20.885 +-0.1926153846153846

In [None]:
# SANITY CHECK
signNum = 10

startframes = [videos[i].get_frame(t+start_TimeDelta + 24*60*60*(timestamps.at[signNum,'START']-first_timestamp)) for i,t in enumerate(sync_time) ]
endframes   = [videos[i].get_frame(t+start_TimeDelta + 24*60*60*(timestamps.at[signNum,'END'  ]-first_timestamp)) for i,t in enumerate(sync_time) ]

print(timestamps.at[signNum,'WORD'])
display(Image.fromarray( cv2.vconcat([concatFrames(*startframes, height=400),concatFrames(*endframes, height=400)])))

In [None]:
try:
    for clipNum in tqdm(range(0, len(timestamps), round(2*len(timestamps)**0.5))): # [5,25,45,65,85]: #
        cameraNum = 0 # 0,1,2,3 #front, below, left, right

        test_clip = videos[cameraNum].subclip(  sync_time[cameraNum]+start_TimeDelta + 24*60*60*(timestamps['START'].values[clipNum]-first_timestamp) ,
                                                sync_time[cameraNum]+start_TimeDelta + 24*60*60*(timestamps['END'  ].values[clipNum]-first_timestamp) )
        print(clipNum, timestamps.at[clipNum,'WORD'], f'{test_clip.duration:.2f} sec')

        test_clip.write_videofile(videosFolder+"/test_clip"+str(clipNum)+timestamps.at[clipNum,'WORD']+".mp4", audio=False, threads = 8, verbose=False,logger=None) #test_clip.ipython_display()
        test_clip.close()

        clear_output(wait=True)

except KeyboardInterrupt:
    pass

In [None]:
# startTime_delta ADJUSTMENT (delay at start - delay at end)
np.mean([-0.3, -0.434, -0.234, -0.6, -0.6, -0.201, -0.7, -0.402, -0.334, -0.367, -0.367, -0.268, -0.201])/2

### Loop All

In [None]:
# find index of a word
timestamps[timestamps["WORD"] == "کیسے.MP4"]

In [None]:
# check word at index
timestamps.iloc[[12]]

In [None]:
signNumber_tqdm = tqdm(range(len(timestamps)))
for signNum in signNumber_tqdm:
    for cameraNum in [0,1,2,3]:
        filename = f"{timestamps['WORD'].values[signNum].replace('.MP4','')}_person{personNumber}_{cameras[cameraNum]}.mp4"
        signNumber_tqdm.set_description(filename)

        clip = videos[cameraNum].subclip(
            sync_time[cameraNum]+start_TimeDelta+ 24*60*60*(timestamps.at[signNum,'START']-first_timestamp) ,
            sync_time[cameraNum]+start_TimeDelta+ 24*60*60*(timestamps.at[signNum,'END'  ]-first_timestamp)
        )

        filepath = os.path.join(clipsFolder, filename)
        if os.path.exists(filepath):
            raise FileExistsError(filename+' already exists!')
        clip.write_videofile(filepath, audio=False, threads=10, verbose=False,logger=None)
        clip.close()

In [None]:
[vid.close() for vid in videos]

# verify start and end frames of clips & recut

In [37]:
base_dir = '/Volumes/GoogleDrive/Other computers/My Laptop/Pakistani Sign Language Translation with 3D Pose Estimation and Attention Models/Datasets/Video_Data/PSL_Clips/HFAD-Book1/person151'

labels151 = list({f.split('_')[0] for f in os.listdir(base_dir) if '.mp4' in f})
labels951 = list({f.split('_')[0] for f in os.listdir(base_dir+'/person951') if '.mp4' in f})
len(labels151), len(labels951), set(labels151) & set(labels951)

(586, 227, set())

In [22]:
# cam1_start, cam2_start, cam3_start, cam4_start
# cam1_mid,   cam2_mid,   cam3_mid,   cam4_mid
# cam1_end,   cam2_end,   cam3_end,   cam4_end

for l in (bar:=tqdm(labels951[:])):
    try:
        first = []
        last  = []
        mid = []
        if os.path.exists(f'../../../951/{l}.jpg'):
            continue

        for cam in ['front', 'below', 'left', 'right']:
            filename = f'{l}_person151_{cam}.mp4'
            bar.set_description(filename)

            # read frames
            cap = cv2.VideoCapture(os.path.join(base_dir, "person951", filename))
            _, frame = cap.read()
            first.append(frame)
            last_frame_num = cap.get(cv2.CAP_PROP_FRAME_COUNT)
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(last_frame_num/2))
            _, frame = cap.read()
            mid.append(frame)
            cap.set(cv2.CAP_PROP_POS_FRAMES, last_frame_num-3)
            _, frame = cap.read()
            last.append(frame)
            cap.release()

            if cam != 'below':
                # resize 0.333x
                first[-1] = first[-1][::3,::3]
                mid[-1] = mid[-1][::3,::3]
                last[-1] = last[-1][::3,::3]

        final = np.concatenate([
            np.concatenate(first, axis=1)[64:512],
            np.concatenate(mid, axis=1)[64:512],
            np.concatenate(last, axis=1)[64:512],
        ], axis=0)
        final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB)
        Image.fromarray(final).save(f'../../../951/{l}.jpg')

    except Exception as e:
        print('error', l)
    finally:
        cap.release()

آندھی_person151_right.mp4: 100%|██████████| 291/291 [00:06<00:00, 42.57it/s]    


In [166]:
label = 'بےوقوف'
start = 0.35 # sec after start
end_ = 0 # sec before end
for cam in (bar:=tqdm(['front', 'below', 'left', 'right'])):
    filename = f'{label}_person151_{cam}.mp4'
    bar.set_description(filename)
    fpath = os.path.join(base_dir, "person951", filename)

    clip = mpy.VideoFileClip(fpath)
    subclip = clip.subclip(start, clip.duration-end_)
    subclip.write_videofile(f'../../../temp/{filename}', audio=False, threads=10, verbose=False,logger=None)
    clip.close()
    subclip.close()

بےوقوف_person151_right.mp4: 100%|██████████| 4/4 [00:11<00:00,  2.93s/it]
