## Installing Dependencies

#### 1. Install ffmpeg
    https://www.wikihow.com/Install-FFmpeg-on-Windows
#### 2. Install libraries

In [1]:
#! pip install imutils
#! pip install opencv-python
#! pip install tqdm

### Import Modules

In [2]:
import os,sys,subprocess,re,math
from subprocess import check_call,PIPE,Popen
import shlex
from tqdm import tqdm
import cv2
import pandas as pd
from imutils.object_detection import non_max_suppression
import numpy as np
import time
from sklearn.cluster import KMeans



### Importing the pre-trained haarcascade files using OpenCV 

In [3]:
haar_upper_body_cascade = cv2.CascadeClassifier("models/haarcascade_upperbody.xml")

haar_full_body_cascade = cv2.CascadeClassifier("models/haarcascade_fullbody.xml")

haar_face_cascade = cv2.CascadeClassifier("models/haarcascade_frontalface_alt.xml")

lbm_face_cascade = cv2.CascadeClassifier("models/lbpcascade_frontalface.xml")

haar_hand_cascade = cv2.CascadeClassifier("models/hand.xml")

haar_lowerbody_cascade = cv2.CascadeClassifier("models/haarcascade_lowerbody.xml")

text_detection_model = "models/frozen_east_text_detection.pb"

In [4]:
DATA_FOLDER = "Dataset"
SEGMENT_OUTPUT = 'segments'
OUTPUT_FOLDER = 'results'

### Function : get_video_frame_types
##### Purpose

This function is used to get what is the frame we're getting in video segments, P-frame or I-Frame.

In [5]:
def get_video_frame_types(filename):
    cmd = 'ffprobe -v error -show_entries frame=pict_type -of default=noprint_wrappers=1'.split()
    out = subprocess.check_output(cmd + [filename]).decode()
    frame_types = out.replace('pict_type=', '').split()
    return zip(range(len(frame_types)), frame_types)

### Function : get_video_length
##### Purpose

This function is used to get video length in seconds.

In [6]:
def get_video_length(video_fn):
    res = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                             "format=duration", "-of",
                             "default=noprint_wrappers=1:nokey=1", video_fn],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
    return float(res.stdout)

In [7]:
pwd

'/Users/vipul/Documents/Training/27JunCapstone/Capstone Project 1'

In [8]:
get_video_length('Dataset/nptel_ml/ML.mp4')

204.961667

### Function : get_video_metadata
##### Purpose

This function is used to split the data into number of segments divided by seconds.

In [9]:
re_metadata = re.compile('Duration: (\d{2}):(\d{2}):(\d{2})\.\d+,.*\n.* (\d+(\.\d+)?) fps')


def get_video_metadata(video_fn):
    p1 = Popen(["ffmpeg", "-hide_banner", "-i", video_fn], stderr=PIPE, universal_newlines=True)
    output = p1.communicate()[1]
    matches = re_metadata.search(output)
    if matches:
        video_length = int(matches.group(1)) * 3600 + int(matches.group(2)) * 60 + int(matches.group(3))
        video_fps = float(matches.group(4))
    else:
        raise Exception("Can't parse required video metadata")
    return video_length, video_fps

In [12]:
def split_video_in_segment(video_fn, num, out_path, by='size'):
    assert num > 0
    assert by in ['size', 'count']
    split_video_size = num if by == 'size' else None
    split_video_count = num if by == 'count' else None
    # parsing meta-data
    video_length, video_fps = get_video_metadata(video_fn)

    # calculate the video video split count
    if split_video_size:
        split_video_count = math.ceil(video_length / split_video_size)
        if split_video_count == 1:
            raise Exception("Too small split size! Please increase the target split size!!")
    else: # split video count
        split_video_size = round(video_length / split_video_count)
    
    # For windows
    #pth, ext = video_fn.rsplit(".", 1)
    #print("pth: {},ext: {}".format(pth,ext))
    #temp_pth = pth.rsplit("\\",1)
    #print("temp_pth: {}".format(temp_pth))
    #folder_name = temp_pth[0].rsplit("\\",1)
    #print("folder_name: {}".format(folder_name))
    #pth = out_path + os.sep + folder_name[1] + os.sep + temp_pth[1]
    #print("updated path: {},ext: {}".format(pth,ext))
    
    # For Linux Machines
    pth, ext = video_fn.rsplit(".", 1)
    temp_pth = pth.rsplit("/",1)
    folder_name = temp_pth[0].rsplit("/",1)
    pth = out_path + os.sep + folder_name[1] + os.sep + temp_pth[1]
    #cmd = 'ffmpeg -hide_banner -loglevel panic -i "{}" -c copy -map 0 -segment_time {} -reset_timestamps 1 -g {} -sc_threshold 0 -force_key_frames "expr:gte(t,n_forced*{})" -f segment -y "{}-%d.{}"'.format(video_fn, split_video_size, round(split_video_size*video_fps), split_video_size, pth, ext)
    cmd = 'ffmpeg -i "{}" -c copy -map 0 -f segment -segment_time 60 -reset_timestamps 1 -y "{}-%d.{}"'.format(video_fn, pth, ext)
    check_call(shlex.split(cmd), universal_newlines=True)

    # returning the list of output (index start from 0)
    return ['{}-{}.{}'.format(pth, i, ext) for i in range(split_video_count)]

## Task 1 : Video Segmentation

### A loop that will iterate over the folders and the videos inside it, call the split function, and the split will be saved in "segments" folder.