In [None]:
import ffmpeg 
from scipy.io import wavfile
from scipy.signal import fftconvolve
import json
import os
import numpy as np
import subprocess 
import cv2
from multiprocessing import Pool


#### Load the videos

In [None]:
video_files = ['../Videos/131_0721_Ben_caru+SG_attack_cam_A.mp4',
               '../Videos/131_0721_Ben_caru+SG_attack_cam_B.mp4',
               '../Videos/131_0721_Ben_caru+SG_attack_cam_C.mp4']

#### Synchronize the videos

In [None]:
def parse_metadata(video_file):
    temp_file = os.path.splitext(video_file)[0] + '.json'
    subprocess.run('ffprobe -v quiet -print_format json -show_format -show_streams {} > {}'.format(video_file,
                                                                                                   temp_file),
                   shell=True)
    metadata = json.load(open(temp_file, 'r'))
    os.remove(temp_file)
    fps = np.array(metadata['streams'][0]['r_frame_rate'].split('/')).astype(int)
    channels = int(metadata['streams'][1]['channels'])
    sample_rate = int(metadata['streams'][1]['sample_rate'])
    fps = fps[0] / fps[1]
    return {'fps': fps,
            'sample_rate': sample_rate,
            'channels': channels}

def extract_audio(video_file, start, duration):
    metadata = parse_metadata(video_file)
    
    temp_file = os.path.splitext(video_file)[0] + '.wav'
    subprocess.run('ffmpeg -y -v quiet -i {} -vn -c:a pcm_s16le -ss {} -t {} {}'.format(video_file,
                                                                                     start,
                                                                                     duration,
                                                                                     temp_file),
                   shell=True)
    sample_rate, signal = wavfile.read(temp_file)
    os.remove(temp_file)
    assert (sample_rate == metadata['sample_rate']) & (signal.shape[1] == metadata['channels']), \
        'audio stream did not match video metadata'
    if metadata['channels'] > 1:
        signal = signal.mean(axis=1)
    return signal

def compute_offset(audio_1, audio_2, sample_rate):
    corr = fftconvolve(audio_1, audio_2[::-1], mode='full')
    offset = np.argmax(corr)
    offset_seconds = ((2 * audio_2.size - 1) // 2 - offset) / sample_rate
    return offset_seconds

def check_input(args, video_files):
    checked_args = []
    for arg in args:
        if isinstance(arg, (float, int)):
            arg = [arg] * len(video_files)
        elif arg is None:
            arg = [30] * len(video_files)   # Why is this 30?
        assert isinstance(arg, list) and (len(arg) == len(video_files)), \
            'start and duration must be either None, or of type int, float or list with length of video files'
        checked_args.append(arg)
    return checked_args

def cut_video(video_file, offset):
    cut_file = os.path.splitext(video_file)[0] + '_cut' + os.path.splitext(video_file)[1]
    subprocess.run('ffmpeg -y -v quiet -i {} -ss {} -c:v libx264 -an -crf 18 -preset ultrafast {}'.format(video_file,
                                                                                                       offset,
                                                                                                       cut_file),
                   shell=True)
    return True

def copy_video(video_file):
    cut_file = os.path.splitext(video_file)[0] + '_cut' + os.path.splitext(video_file)[1]
    subprocess.run('cp {} {}'.format(video_file, cut_file), shell=True)
    return True

def synchronize_videos(video_files, start=None, duration=None, dry_run=False):
    metadata = [parse_metadata(video_file) for video_file in video_files]
    assert len(set([metadata[idx]['fps'] for idx in range(len(metadata))])) == 1, \
        'videos must have the same frame rate'
    assert len(set([metadata[idx]['fps'] for idx in range(len(metadata))])) == 1, \
        'audio streams must have the same sample rate'
    start, duration = check_input([start, duration], video_files) # Start = 0, otherwise wavfiles are empty
    signals = [extract_audio(video_file, start=0, duration=d) \
               for video_file, s, d in zip(video_files, start, duration)]
    offsets = [0]
    ref = signals[0]
    signals = signals[1:]
    for sig in signals:
        offsets.append(compute_offset(ref, sig, metadata[0]['sample_rate']))
    offsets = np.array(offsets) - np.min(offsets)
    for idx, video_file in enumerate(video_files):
        if dry_run:
            continue
        if offsets[idx] * metadata[idx]['fps'] < 1:
            copy_video(video_file)
        else:
            cut_video(video_file, offsets[idx])
    return True

In [None]:
# set duration of audio signal used for synchronization
# dry_run=True can be used to just print the video offsets without cutting
# ffmpeg needs to be installed (this is only tested on linux systems)

synchronize_videos(video_files, duration=180, dry_run=False)

### Extract one frame every second

In [None]:
def extract_frames(video_file, sampling_rate, output_directory, start_frame=0):
    base_name = os.path.splitext(os.path.basename(video_file))[0]
    output_directory = os.path.join(output_directory, base_name)
    cap = cv2.VideoCapture(video_file)
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    end_frame = n_frames
    ret = True
    frame_idx = 0
    if not os.path.exists(output_directory) and start_frame == 0:
        os.makedirs(output_directory, exist_ok=True)
    while ret:
        ret, img = cap.read()
        if frame_idx % sampling_rate == start_frame and frame_idx >= start_frame and frame_idx <= end_frame:
            cv2.imwrite(os.path.join(output_directory, '{}_{:05d}.jpg'.format(base_name, frame_idx)), img)
        frame_idx += 1
        if frame_idx > end_frame:
            break
    return True

class FrameSampler:
    
    def __init__(self, video_file, sampling_rate, output_directory, n_jobs=-1):
        self.n_jobs = os.cpu_count() if n_jobs <= 0 or not isinstance(j_jobs, int) else n_jobs
        self.video_file = video_file
        self.sampling_rate = sampling_rate
        self.output_directory = output_directory
        
    def run(self):
        if self.n_jobs == 1:
            extract_frames(self.video_file, self.sampling_rate, self.output_directory, start_frame=0)
        else:
            process_list = [[self.video_file] * self.n_jobs,
                            [self.sampling_rate * (self.n_jobs + 1)] * self.n_jobs,
                            [self.output_directory] * self.n_jobs,
                            np.arange(0, self.sampling_rate * self.n_jobs, self.sampling_rate)]
            with Pool(processes=self.n_jobs) as pool:
                pool.starmap(extract_frames, list(zip(*process_list)))
                pool.close()
                pool.join()
        return True

##### Make list of cut video files

In [None]:
files = os.listdir('../Videos')
cut_video_files = []
for filename in files:
    if '_cut' in filename and '.mp4' in filename:
        cut_video_files.append('../Videos/' + filename)
print(cut_video_files)

##### Run frame sampler for all files

In [None]:
# set paths and sampling rate accordingly
# note sampling_rate refers to every nth frame that should be extracted
# so if you have a 60fps video, sampling_rate=20 will extract 3 frames per second

for file in cut_video_files:
    print(file)
    cap = cv2.VideoCapture(file)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    print(fps)
    sampler = FrameSampler(video_file=file, sampling_rate=fps, output_directory='../Videos')
    sampler.run()