# Highlights detection

## Предобработка видео

In [1]:
import numpy as np
import cv2

### Сохраняем кадры

In [2]:
def resize_frame(img, size, interpolation=cv2.INTER_CUBIC):
    h, w = img.shape[:2]

    if w < size and h < size:
        return img

    if (w <= h and w == size) or (h <= w and h == size):
        return img
    if w < h:
        ow = size
        oh = int(size * h / w)
        output = cv2.resize(img, dsize=(ow, oh), interpolation=interpolation)
    else:
        oh = size
        ow = int(size * w / h)
        output = cv2.resize(img, dsize=(ow, oh), interpolation=interpolation)

    if img.shape[2] == 1:
        return output[:, :, np.newaxis]
    else:
        return output

In [3]:
def save_frames_from_video(video_file, video_frames_dir, new_size=256, fps_factor=3):
    video_capture = cv2.VideoCapture(video_file)
    video_capture.set(cv2.CAP_PROP_POS_AVI_RATIO, 1)
    video_len = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000
    video_capture.release()

    video_capture = cv2.VideoCapture(video_file)

    if not video_capture.isOpened():
        return 0

    frame_num = 0
    frames_list = []

    fps = video_capture.get(cv2.CAP_PROP_FPS)
    if fps == 1000 or fps == 0:
        video_capture.release()
        return 0

    frame_rate = 1 / fps_factor
    sec = 0

    secs = []

    while video_capture.isOpened():
        video_capture.set(cv2.CAP_PROP_POS_MSEC, sec * 1000)

        ret, frame = video_capture.read()

        if ret:
            frames_list.append(resize_frame(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), new_size))

            frame_num += 1
            secs.append(sec)

            sec = sec + frame_rate
            sec = round(sec, 2)

            if sec > video_len:
                break

        else:
            if sec == video_len:
                break

            if sec < video_len:
                if video_len - sec > 1:
                    video_capture.release()
                    return 0
                else:
                    break

    video_capture.release()

    os.mkdir(video_frames_dir)

    frames_filename = os.path.join(video_frames_dir, 'frames')
    np.savez_compressed(frames_filename, *frames_list)

    frames_sec_filename = os.path.join(video_frames_dir, 'frames_timing.txt')
    with open(frames_sec_filename, 'w') as f:
        for sec in secs:
            f.write('{}\n'.format(sec))

    return 1

In [4]:
import os

video_file = 'data/videos/helms_deep.mp4'
frames_dir = 'data/frames'

video_frames_dir = os.path.join(frames_dir, os.path.splitext(os.path.basename(video_file))[0])

In [5]:
save_frames_from_video(video_file, video_frames_dir)

1

### Нарезаем на сцены

In [9]:
def get_video_len(video_file):
    video_capture = cv2.VideoCapture(video_file)
    video_capture.set(cv2.CAP_PROP_POS_AVI_RATIO, 1)
    video_len = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000
    video_capture.release()

    return video_len

In [7]:
def save_scenes_timing(video_file, frames_dir, time_step):
    scenes_file = os.path.join(frames_dir, 'scenes_{}.txt'.format(time_step))

    video_len = get_video_len(video_file)

    scene_number = 0

    with open(scenes_file, 'w') as f:
        start_gris_sec = 0
        end_grid_sec = time_step
        while end_grid_sec <= video_len:
            f.write('scene_{};{};{}\n'.format(scene_number, float(start_gris_sec), float(end_grid_sec)))
            start_gris_sec += time_step
            end_grid_sec += time_step
            scene_number += 1

    return scene_number

In [10]:
time_step = 6
save_scenes_timing(video_file, video_frames_dir, time_step)

32

### Находим pos / neg сцены

In [11]:
def overlap(min1, max1, min2, max2):
    return max(0, min(max1, max2) - max(min1, min2))
            
video_gif_secs = [(70, 84), (180, 191)]

overlap_thresh = 0.66

pos_num = 0
neg_num = 0

scenes_file = os.path.join(video_frames_dir, 'scenes_{}.txt'.format(time_step))
positive_file = os.path.join(video_frames_dir, 'positive_{}.txt'.format(time_step))
negative_file = os.path.join(video_frames_dir, 'negative_{}.txt'.format(time_step))
frames_timing_file = os.path.join(video_frames_dir, 'frames_timing.txt')

frames_timing = []
with open(frames_timing_file) as f:
    for line in f:
        frames_timing.append(float(line.rstrip('\n')))

with open(scenes_file) as f, open(positive_file, 'w') as p_f, open(negative_file, 'w') as n_f:
    for line in f:
        scene_name, start_sec, end_sec = line.rstrip('\n').split(';')
        start_sec = float(start_sec)
        end_sec = float(end_sec)

        scene_frames = []
        for i, sec in enumerate(frames_timing):
            if start_sec <= sec <= end_sec:
                scene_frames.append(str(i))

        overlaps = []
        for sec_pair in video_gif_secs:
            len_scene = end_sec - start_sec
            len_gif = sec_pair[1] - sec_pair[0]
            if len_scene < len_gif:
                len_smallest = len_scene
            else:
                len_smallest = len_gif

            if not len_smallest:
                continue

            ov = overlap(start_sec, end_sec, sec_pair[0], sec_pair[1]) / len_smallest
            overlaps.append(ov)

        overlaps = np.array(overlaps)
        if np.any(overlaps > overlap_thresh):
            p_f.write('{};{}\n'.format(scene_name, ';'.join(scene_frames)))
            pos_num += 1
        else:
            n_f.write('{};{}\n'.format(scene_name, ';'.join(scene_frames)))
            neg_num += 1

print('Positive scenes: {}, negative scenes: {}'.format(pos_num, neg_num))

Positive scenes: 4, negative scenes: 28


## Conv 3D

In [12]:
import torch.nn as nn
import torch

class C3D(nn.Module):

    def __init__(self):
        super(C3D, self).__init__()

        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))

        self.fc6 = nn.Linear(8192, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, 487)

        self.dropout = nn.Dropout(p=0.5)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):
        h = self.relu(self.conv1(x))
        h = self.pool1(h)

        h = self.relu(self.conv2(h))
        h = self.pool2(h)

        h = self.relu(self.conv3a(h))
        h = self.relu(self.conv3b(h))
        h = self.pool3(h)

        h = self.relu(self.conv4a(h))
        h = self.relu(self.conv4b(h))
        h = self.pool4(h)

        h = self.relu(self.conv5a(h))
        h = self.relu(self.conv5b(h))
        h = self.pool5(h)

        h = h.view(-1, 8192)
        out = self.fc6(h)

        return out

In [13]:
def get_model(c3d_path):
    embed_net = C3D()
    embed_net.load_state_dict(torch.load(c3d_path))

#     embed_net = torch.nn.DataParallel(embed_net).cuda()
    # embed_net.cuda()

    embed_net.eval()

    return embed_net

In [14]:
c3d_path = 'c3d.pickle'
embed_net = get_model(c3d_path)
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x11dbd6e48>

## Вычисление векторных представлений сцен

In [15]:
from opencv_transforms import opencv_transforms

### Обработка сцены

In [16]:
def process_scene(processing_frames, frames_per_clip=16, overlap_frames=8):
    clips_indexes = []
    to_copy_clips_ind = defaultdict(int)
    if len(processing_frames) < frames_per_clip:
        need_frames = frames_per_clip - len(processing_frames)
        is_replace = False
        if need_frames > len(processing_frames):
            is_replace = True

        to_copy_clips = np.random.choice(len(processing_frames), need_frames, replace=is_replace)

        for i in to_copy_clips:
            to_copy_clips_ind[i] += 1

        clips_indexes.append(processing_frames)
    elif len(processing_frames) > frames_per_clip:
        start_ind = 0
        end_ind = frames_per_clip

        while end_ind <= len(processing_frames):
            clips_indexes.append(processing_frames[start_ind:end_ind])
            start_ind += overlap_frames
            end_ind += overlap_frames

        end_ind = len(processing_frames)
        start_ind = len(processing_frames) - frames_per_clip
        clips_indexes.append(processing_frames[start_ind:end_ind])
    else:
        clips_indexes.append(processing_frames)

    return clips_indexes, to_copy_clips_ind


def get_scenes_clips(frames, frames_names, clips_indexes, to_copy_clips_ind, transform,
                     frames_per_clip=16, ch=3, width=112, height=112):

    clips_tensor = torch.zeros([len(clips_indexes), ch, frames_per_clip, width, height])

    for i, ind in enumerate(clips_indexes):
        clip = []
        for j, frame in enumerate(np.array(sorted(frames_names))[ind]):
            name = frames_names[frame]
            # img = cv2.cvtColor(frames[name], cv2.COLOR_RGB2BGR)
            img = frames[name]
            img = transform(img)

            clip.append(img)

            if j in to_copy_clips_ind:
                for _ in range(to_copy_clips_ind[j]):
                    clip.append(img)

        clip = np.array(clip)
        clip = clip.transpose((3, 0, 1, 2))
        clip = np.float32(clip)

        clips_tensor[i] = torch.from_numpy(clip)

    return clips_tensor


def get_embeddings(embed_net, clips, frames, frames_names, transform, frames_per_clip=16, overlap_frames=8,
                   ch=3, width=112, height=112):
    clips_batch = []
    for clips_frames in clips:
        clips_indexes, to_copy_clips_ind = process_scene(clips_frames, frames_per_clip, overlap_frames)
        t_clips = get_scenes_clips(frames, frames_names, clips_indexes, to_copy_clips_ind,
                                   transform, frames_per_clip, ch, width, height)

        t_clips = t_clips
        clips_batch.append(embed_net(t_clips).mean(0).cpu())

    return torch.stack(clips_batch)

### Вычисление и сохранение pos / neg представлений

In [17]:
from collections import defaultdict

frames_per_clip = 16
overlap_frames = 8
ch = 3
width = 112
height = 112

data_transforms = opencv_transforms.Compose([
    opencv_transforms.Resize((112, 200)),
    opencv_transforms.CenterCrop(112),
])

positive_file = os.path.join(video_frames_dir, 'positive_{}.txt'.format(time_step))
negative_file = os.path.join(video_frames_dir, 'negative_{}.txt'.format(time_step))

pos = []
with open(positive_file) as p_f:
    for line in p_f:
        split_res = line.rstrip('\n').split(';')
        if split_res[0] and split_res[1]:
            frames = [int(x) for x in split_res[1:]]
            # if len(frames) > args.frames_per_clip:
            if len(frames) > frames_per_clip:
                pos.append(frames)

# if len(pos) == 0:
#     continue

neg = []
with open(negative_file) as n_f:
    for line in n_f:
        split_res = line.rstrip('\n').split(';')
        if split_res[0] and split_res[1]:
            frames = [int(x) for x in split_res[1:]]
            # if len(frames) > args.frames_per_clip:
            if len(frames) > frames_per_clip:
                neg.append(frames)

# if len(neg) == 0:
#     continue

video_frames_file = os.path.join(video_frames_dir,'frames.npz')
frames = np.load(video_frames_file)
frames_names = {int(x.split('_')[1]): x for x in frames.files}

pos_embed_file = os.path.join(video_frames_dir, 'positive_{}_embeddings.pt'.format(time_step))
neg_embed_file = os.path.join(video_frames_dir, 'negative_{}_embeddings.pt'.format(time_step))

pos_clips_batch = get_embeddings(embed_net, pos, frames, frames_names, data_transforms,
                                         frames_per_clip, overlap_frames, ch, width, height)

torch.save(pos_clips_batch, pos_embed_file)

neg_clips_batch = get_embeddings(embed_net, neg, frames, frames_names, data_transforms,
                                 frames_per_clip, overlap_frames, ch, width, height)

torch.save(neg_clips_batch, neg_embed_file)

## Обучение Rank Net

In [19]:
class SceneRankNet(nn.Module):

    def __init__(self):
        super(SceneRankNet, self).__init__()

        self.dropout1 = nn.Dropout(p=0.8)
        self.dropout2 = nn.Dropout(p=0.25)

        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512, 128)

        self.activ1 = nn.ELU()

        self.activ2 = nn.ELU()

        self.scoring = nn.Linear(128, 1)

    def forward(self, x):
        out = self.dropout1(x)

        out = self.fc1(out)
        out = self.activ1(out)

        out = self.dropout2(out)

        out = self.fc2(out)
        embed = out
        out = self.activ2(out)

        out = self.scoring(out)

        return out, embed

In [20]:
def get_rank_model(score_net_path):
    rank_net = SceneRankNet()

    map_location = lambda storage, loc: storage.cpu()
    rank_net.load_state_dict(torch.load(score_net_path, map_location=map_location))

#     rank_net.cuda()
    rank_net.eval()

    return rank_net

rank_net = get_rank_model('rank_net.pth')

### Функция потерь

In [None]:
def smooth_l1_loss(positive, negative, delta=1, size_average=False):
    u = 1 - positive + negative
    value = torch.clamp(u, min=0)

    cond = u <= delta
    loss = torch.where(cond, 0.5 * value ** 2, delta * value - 0.5 * delta ** 2)
    if size_average:
        return loss.mean()
    return loss.sum()

rank_loss = smooth_l1_loss

In [None]:
pos_embed, neg_embed = embeddings

pos_batch = torch.stack([embed for embed in pos_embed])
neg_batch = torch.stack([embed for embed in neg_embed])

pos_scores, pos_score_embed = rank_net(pos_batch)
neg_scores, neg_score_embed = rank_net(neg_batch)

loss = rank_loss(pos_scores, neg_scores, delta=args.huber_delta, size_average=False)

## Inference

### Предобработка видео и вычисление векторных представлений

In [21]:
def get_frames_from_video(video_file, new_size=256, fps_factor=3):
    video_capture = cv2.VideoCapture(video_file)
    video_capture.set(cv2.CAP_PROP_POS_AVI_RATIO, 1)
    video_len = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000
    video_capture.release()

    # print('Video len: {}'.format(video_len))

    video_capture = cv2.VideoCapture(video_file)

    if not video_capture.isOpened():
        return 0

    frame_num = 0
    frames_list = []

    fps = video_capture.get(cv2.CAP_PROP_FPS)
    if fps == 1000 or fps == 0:
        print('Bad fps with {}'.format(video_file))
        video_capture.release()
        return 0

    # frame_rate = 1 / fps * fps_factor
    frame_rate = 1 / fps_factor
    sec = 0

    secs = []

    while video_capture.isOpened():
        video_capture.set(cv2.CAP_PROP_POS_MSEC, sec * 1000)

        ret, frame = video_capture.read()

        if ret:
            frames_list.append(resize_frame(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), new_size))

            frame_num += 1
            secs.append(sec)

            sec = sec + frame_rate
            sec = round(sec, 2)

            # print(sec)

            if sec > video_len:
#                 print('Exceeded video length: {} vs {}'.format(sec, video_len))
                break

        else:
            if sec == video_len:
                break

            if sec < video_len:
                if video_len - sec > 1:
                    print('Problems with frames splitting: {} vs {}'.format(sec, video_len))
                    video_capture.release()
                    return 0
                else:
                    break

    video_capture.release()

    return frames_list, secs, video_len


def get_scenes_frames(video_file, last_sec, grid_scene_len=6):
    scenes_timing = []
    step = grid_scene_len
    start_sec = 0
    end_sec = step
    while end_sec <= last_sec:
        scenes_timing.append((float(start_sec), float(end_sec)))
        start_sec += step
        end_sec += step

    return scenes_timing


def get_scenes_clips(frames, clips_indexes, to_copy_clips_ind, transform,
                     frames_per_clip=16, ch=3, width=112, height=112):

    clips_tensor = torch.zeros([len(clips_indexes), ch, frames_per_clip, width, height])

    for i, ind in enumerate(clips_indexes):
        clip = []
        for j, frame in enumerate(np.array(frames)[ind]):
            img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # img = frame
            img = transform(img)

            clip.append(img)

            if j in to_copy_clips_ind:
                for _ in range(to_copy_clips_ind[j]):
                    clip.append(img)

        clip = np.array(clip)
        clip = clip.transpose((3, 0, 1, 2))
        clip = np.float32(clip)

        clips_tensor[i] = torch.from_numpy(clip)

    return clips_tensor


def compute_embeddings(embed_net, clips, frames, transform, frames_per_clip=16, overlap_frames=8,
                   ch=3, width=112, height=112):
    clips_batch = []
    scene_process_times = []
    embed_net_times = []

    for clips_frames in tqdm(clips):
        scene_start_time = time.time()
        clips_indexes, to_copy_clips_ind = process_scene(clips_frames, frames_per_clip, overlap_frames)
        t_clips = get_scenes_clips(frames, clips_indexes, to_copy_clips_ind,
                                   transform, frames_per_clip, ch, width, height)
        scene_end_time = time.time()
        scene_process_times.append(scene_end_time - scene_start_time)

#         t_clips = t_clips.cuda()
        start_embed_net_time = time.time()
        clips_batch.append(embed_net(t_clips).mean(0).cpu())
        end_embed_net_time = time.time()
        embed_net_times.append(end_embed_net_time - start_embed_net_time)

    if len(clips_batch) == 0:
        return None, 0, 0

    return torch.stack(clips_batch), scene_process_times, embed_net_times

def get_embeddings_for_video(video_file, embed_net, data_transforms, grid_scene_len=6):
    np.random.seed(3)

    frames_per_clip = 16
    overlap_frames = 8
    ch = 3
    width = 112
    height = 112

    print('\nVideo file: {}'.format(video_file))
    
    result = get_frames_from_video(video_file, 256, 3)
    if result == 0:
        return 0

    frames_list, frames_timing, video_len = result

    scenes_timing = get_scenes_frames(video_file, video_len, grid_scene_len=grid_scene_len)

    significant_scenes_timing = []

    scenes_frames = []
    for j, (start_sec, end_sec) in enumerate(scenes_timing):
        scene_frames = []
        for i, sec in enumerate(frames_timing):
            if start_sec <= sec <= end_sec:
                scene_frames.append(i)

        if len(scene_frames) > frames_per_clip:
            scenes_frames.append(scene_frames)
            significant_scenes_timing.append((start_sec, end_sec))
        
    scenes_embed_batch, _, __ = compute_embeddings(embed_net, scenes_frames, frames_list, data_transforms,
                                                   frames_per_clip, overlap_frames, ch, width, height)

    if scenes_embed_batch is None:
        return 0

    return scenes_embed_batch, significant_scenes_timing

In [22]:
c3d_path = 'c3d.pickle'
embed_net = get_model(c3d_path)
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x11e46e668>

In [23]:
data_transforms = opencv_transforms.Compose([
        opencv_transforms.Resize((112, 200)),
        opencv_transforms.CenterCrop(112),
    ])

In [24]:
video_folder = 'data/videos' 
fname = 'bd.mp4'
metadata_dir = 'data/frames'

### Вычисляем векторные представления видео

In [25]:
import math
from tqdm import tqdm
import time

grid_scene_len = 6

video_name = os.path.splitext(fname)[0]

video_file = os.path.join(video_folder, fname)
video_data_dir = os.path.join(metadata_dir, video_name)

scenes_timing_file = os.path.join(video_data_dir, 'scenes_timing.txt')

if not os.path.isdir(video_data_dir):
    os.mkdir(video_data_dir)

result = get_embeddings_for_video(video_file, embed_net, data_transforms, grid_scene_len=grid_scene_len)

clips_batch, scenes_timing = result

embed_file = os.path.join(video_data_dir, 'embeddings_{}.pt'.format(grid_scene_len))

torch.save(clips_batch, embed_file)

with open(scenes_timing_file, 'w') as f:
    for start_sec, end_sec in scenes_timing:
        f.write('{};{}\n'.format(start_sec, end_sec))


Video file: data/videos/bd.mp4


100%|██████████| 18/18 [01:22<00:00,  4.57s/it]


### Получаем скоры и тайминги хайлайтов

In [26]:
def get_video_highlights_timing_by_c3d_embeddings(video_metadata_dir, rank_net, grid_scene_len=6):
    scenes_timing_file = os.path.join(video_metadata_dir, 'scenes_timing.txt')
    embed_file = os.path.join(video_metadata_dir, 'embeddings_{}.pt'.format(grid_scene_len))

    scenes_timing = []
    with open(scenes_timing_file) as f:
        for line in f:
            split_res = line.rstrip('\n').split(';')
            scenes_timing.append((float(split_res[0]), float(split_res[1])))

    video_len = scenes_timing[-1][1]

    scenes_embed_batch = torch.load(embed_file)
    scene_scores, embeds = rank_net(scenes_embed_batch)

    scene_scores = scene_scores.cpu()

    scores = scene_scores.transpose(1, 0)[0]
    sort_ind = torch.argsort(scores, descending=True)

    highlight_start_sec = []
    highlight_end_sec = []
    highlight_scenes_scores = []

    for i, ind in enumerate(sort_ind):
        ind = ind.item()

        start_sec, end_sec = scenes_timing[ind]
        highlight_start_sec.append(start_sec)
        highlight_end_sec.append(end_sec)
        highlight_scenes_scores.append(scores[ind].item())

    chrono_ind = np.argsort(highlight_start_sec)
    highlight_start_sec = np.array(highlight_start_sec)[chrono_ind]
    highlight_end_sec = np.array(highlight_end_sec)[chrono_ind]
    highlight_scenes_scores = np.array(highlight_scenes_scores)[chrono_ind]

    return highlight_start_sec, highlight_end_sec, highlight_scenes_scores, video_len

In [27]:
video_metadata_dir = 'data/frames/bd'

highlight_start_sec, highlight_end_sec, highlight_scenes_scores, video_len = \
get_video_highlights_timing_by_c3d_embeddings(video_metadata_dir, rank_net, 6)

print(highlight_start_sec)
print(highlight_end_sec)
print(highlight_scenes_scores)

[   0.    6.   12.   18.   24.   30.   36.   42.   48.   54.   60.   66.
   72.   78.   84.   90.   96.  102.]
[   6.   12.   18.   24.   30.   36.   42.   48.   54.   60.   66.   72.
   78.   84.   90.   96.  102.  108.]
[ 0.22491862  0.00776201 -0.4874692  -0.4005754  -0.52681738 -0.2362529
 -0.22521007  0.72785306  0.46509102  0.14945415  0.24952649  0.28643593
  0.19895637  0.74111307  0.17376933 -0.0564931   0.18742171 -0.29617655]


In [28]:
sort_ind = np.argsort(highlight_scenes_scores)[::-1]

In [29]:
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

video_file = 'data/videos/bd.mp4'
highlights_dir = 'data/highlights'

h_number = 3

for i in range(h_number):
    highlight_name = 'highlight_{}.mp4'.format(i)
    highlight_file = os.path.join(highlights_dir, highlight_name)
    ffmpeg_extract_subclip(video_file, highlight_start_sec[sort_ind][i], highlight_end_sec[sort_ind][i],
                           targetname=highlight_file)

Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
