In [20]:
import cv2
import time
import youtube_dl
from matplotlib import pyplot as plt
import numpy as np
import ffmpeg
import os

## 1.1

In [19]:
video_name = "videoplayback.mp4"
frames = "frames/"
ffmpeg.input(video_name, t=30).output('frames/frame-%3d.jpg', start_number=0).overwrite_output().run(quiet=True)

(b'',
 b"ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers\n  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)\n  configuration: --prefix=/tmp/build/80754af9/ffmpeg_1587154242452/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho --cc=/tmp/build/80754af9/ffmpeg_1587154242452/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --enable-avresample --enable-gmp --enable-hardcoded-tables --enable-libfreetype --enable-libvpx --enable-pthreads --enable-libopus --enable-postproc --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --disable-nonfree --enable-gpl --enable-gnutls --disable-openssl --enable-libopenh264 --enable-libx264\n  libavutil      56. 31.100 / 56. 31.100\n  libavcodec     58. 54.100 / 58. 54.100\n  libavformat    58. 29.100

## 1.2

In [48]:
frames_dir = [frames + f for f in os.listdir(frames) if f.endswith(".jpg")]
frames_dir.sort()

In [49]:
def haar_detector(img,scale_fac = 1.1):
    grayscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    size = grayscale_image.shape
    face_cascade = cv2.CascadeClassifier('face.xml') 
    detected_faces = face_cascade.detectMultiScale(grayscale_image,scaleFactor=scale_fac,flags=cv2.CASCADE_SCALE_IMAGE)
    for (x,y,w,h) in detected_faces:
        cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
    return detected_faces, img

In [50]:
boxes = []
new_frames = []
for i in range(len(frames_dir)):
    img = cv2.imread(frames_dir[i])
    faces, frame = haar_detector(img)
    boxes.append(faces)
    new_frames.append(frame)


In [51]:
scale_fac = [1.01,1.1,2.5]
t = []
for sc_f in scale_fac:
    a = time.time()
    for i in range(len(frames_dir)):
        img = cv2.imread(frames_dir[i])
        haar_detector(img,sc_f)
    t.append((time.time() - a))

In [52]:
t

[288.0384511947632, 36.95407962799072, 14.027289628982544]

In [53]:
print("average time taken for each frame is: ",t[1]/720, "seconds")

average time taken for each frame is:  0.05132511059443156 seconds


## Observations
1. One of the key factors which changes the speed of the algorithm is the scale Factor reducing it increases the time significantly. This is because scale Factor decides how much scaling should happen between previous and next cycle. And when it is low more number of samples are formed so it increases the time of the algorithm

## 1.3

In [57]:
fourcc = cv2.VideoWriter_fourcc(*'MJPG')  # Codec for AVI format
out = cv2.VideoWriter('face_detect.mp4', fourcc, 24.0, (854, 480))

for frame in new_frames:
    out.write(frame)
out.release()

OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Link to video: https://drive.google.com/file/d/1VvHln7TNbIc6UIcEOmSMFcyvAAvgHgzO/view?usp=sharing

## Observations
1. If the eyes are closed then detector does not detect a face. This issue is because haar filter for eyes did not detect closed eyes and the Viola Jones Algorithm then did not have eyes as a feature and so it did not detect a face.
2. Ears are being detected as a face. It is because the xml file was for frontal face detection and but the person was standing sideways and so the algorithm incorrect part as face.
3. In some frames tree looks like a face and since haar features are binary masks they cannot differentiate between skin colors and hence since it was structurally looking as a face so the algorithm detected it as a face

## 1.4

In [67]:
def calculate_iou(bbox1, bbox2):
    
    x1, y1, width1, height1 = bbox1
    x2, y2, width2, height2 = bbox2

    intersect_left = max(x1, x2)
    intersect_top = max(y1, y2)
    intersect_right = min(x1 + width1, x2 + width2)
    intersect_bottom = min(y1 + height1, y2 + height2)

    if intersect_right < intersect_left or intersect_bottom < intersect_top:
        return 0.0

    intersection = (intersect_right - intersect_left) * (intersect_bottom - intersect_top)
    area1 = width1 * height1
    area2 = width2 * height2

    union = area1 + area2 - intersection
    return intersection / union


In [69]:
object_id = 1
previous_labels = []
processed_frames = []

for i in range(1, len(frames_dir)):
    current_frame = cv2.imread(frames_dir[i]).copy()
    current_labels = []
    for j in range(len(boxes[i])):
        current_label = None
        current_bbox = boxes[i][j]
        for prev_index in range(len(previous_labels)):
            previous_bbox = boxes[i-1][prev_index]
            if calculate_iou(current_bbox, previous_bbox) > 0.5:
                current_label = previous_labels[prev_index]
                break
        if current_label is None:
            current_label = object_id
            object_id += 1
        current_frame = cv2.rectangle(current_frame, (current_bbox[0], current_bbox[1]), (current_bbox[0] + current_bbox[2], current_bbox[1] + current_bbox[3]), (0,0,255), 5)
        current_frame = cv2.putText(current_frame, str(current_label), (current_bbox[0], current_bbox[1] + current_bbox[3]), vis_font, vis_font_scale, vis_color, vis_thickness, cv2.LINE_AA)
        current_labels.append(current_label)
    previous_labels = current_labels
    processed_frames.append(current_frame)

In [70]:
fourcc = cv2.VideoWriter_fourcc(*'MJPG')  # Codec for AVI format
out = cv2.VideoWriter('face_detect_label.mp4', fourcc, 24.0, (854, 480))

for frame in processed_frames:
    out.write(frame)
out.release()

OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Link to video: https://drive.google.com/file/d/1coXrJ-e6iMMVTkTpzscIEN2dXTHk7uzr/view?usp=sharing