In [85]:
import numpy as np
import subprocess
import os
import cv2

In [86]:
# # use ffmpeg to convert the video to a series of images
# def extract_frames(video_path, output_dir, duration=30):
#     # Create output directory if it doesn't exist
#     os.makedirs(output_dir, exist_ok=True)

#     # Run ffmpeg command to extract frames
#     command = [
#         'ffmpeg',
#         '-i', video_path,
#         # '-vf', 'fps=1/1',
#         '-t', str(duration),
#         os.path.join(output_dir, 'frame_%04d.png')
#     ]
#     subprocess.run(command)

# video_path = './ForrestGump.mp4'
# output_dir = 'frames'
# extract_frames(video_path, output_dir)

In [87]:
# use viola jones to detect faces in the images
face_rectangles = []
def detect_faces(image_path, face_cascade, faces_dir='faces'):
    # Load the cascade

    # Read the input image
    img = cv2.imread(image_path)

    # Convert into grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_cascade.detectMultiScale(gray)

    # Draw rectangle around the faces
    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # save the face rectangles
    face_rectangles.append(faces)

    # Save the output
    faces_path = os.path.join(faces_dir, os.path.basename(image_path))
    cv2.imwrite(faces_path, img)

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# detect faces in all the images and save the results in another directory
frames_dir = 'frames'
faces_dir = 'faces'
os.makedirs(faces_dir, exist_ok=True)
frame_files = os.listdir(frames_dir)
frame_files.sort()

for frame in frame_files:
    frame_path = os.path.join(frames_dir, frame)
    detect_faces(frame_path, face_cascade, faces_dir)

## 1.2
The code takes a total of 36 seconds to detect faces in all 720 images. This means that it takes 0.05 seconds on average to detect faces in one image.
The factors that affect the speed of the face detection algorithm are:
1. Scale Factor: The scaleFactor parameter used in the detectMultiScale function determines how much the image size is reduced at each image scale. Smaller values lead to slower but more accurate detection, while larger values speed up detection but may miss smaller faces.

2. Minimum Neighbors: The minNeighbors parameter specifies how many neighbors each candidate rectangle should have to retain it. Higher values increase accuracy but also increase processing time.
The number of weak classifiers being used
3. The number of features being used and the number of weak classifiers being used.
4. The size of the image

In [88]:
# # use ffmpeg to convert the images back to a video
# def create_video(frames_dir, output_path, fps=24):
#     # Run ffmpeg command to create video
#     command = [
#         'ffmpeg',
#         '-framerate', str(fps),
#         '-i', os.path.join(frames_dir, 'frame_%04d.png'),
#         '-c:v', 'libx264',
#         '-pix_fmt', 'yuv420p',
#         output_path
#     ]
#     subprocess.run(command)

# output_path = 'output.mp4'
# create_video(faces_dir, output_path)

## 1.3
Here is the link to the video with detected faces:
[Link to the video](https://iiitaphyd-my.sharepoint.com/:v:/g/personal/mulukutla_p_research_iiit_ac_in/EbxlMeSlbIVMm458a1FtfnEB_AyQYyxWw5OPynz6ggAYkg?nav=eyJyZWZlcnJhbEluZm8iOnsicmVmZXJyYWxBcHAiOiJPbmVEcml2ZUZvckJ1c2luZXNzIiwicmVmZXJyYWxBcHBQbGF0Zm9ybSI6IldlYiIsInJlZmVycmFsTW9kZSI6InZpZXciLCJyZWZlcnJhbFZpZXciOiJNeUZpbGVzTGlua0NvcHkifX0&e=8b52RS)

Observations:
1. The face detection algorithm is able to detect faces very accurately if they are facing the camera directly. However, it struggles to detect faces that are not facing the camera directly or are partially occluded. Though side faces are detected sometimes, it is not reliable.
2. The algorithm detects a lot of non-faces as faces in the video. This is because the algorithm is not able to differentiate between faces and objects that look similar to faces. This mostly happens when the object is slightly textured similar to a face. That is, the Haar cascades only look at the sum of pixel intensities in the region and not the texture of the region, which leads to false positives. Some examples are some leaf patterns in the background or Forrest's shirt patterns being detected as faces.
3. The algorithm is not able to detect faces that are a little far away from the camera, because the eyes and other features that are used by the Haaar cascades to detect faces are not clearly visible in the image.
4. It is also not able to detect faces when the face is visible, but is upside down. This also happens with faces that are tilted. This is because the Haar cascades are created to identify faces in rectangles, so tilted faces are not detected.
5. A good feature is that it is able to detect faces even when they are slightly blurred, but the features are still somewhat visible. This is again because the Haar cascades only look at the sum of pixel intensities in the region and not the texture of the region.

In [89]:
# generate face tracks using IoU
def iou(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA) * max(0, yB - yA)

    # compute the area of both the prediction and ground-truth rectangles
    boxAArea = boxA[2] * boxA[3]
    boxBArea = boxB[2] * boxB[3]

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou


In [90]:
# # continue a face track only if IoU > 0.5, otherwise start a new face track. Multiple face tracks can be active at the same time.
# # face_rectangles contains the face rectangles for each frame

# class FaceTracker:
#     def __init__(self, start_frame):
#         self.start_frame = start_frame
#         self.end_frame = -1
#         self.face_rectangles = []

#     def add_face(self, face):
#         self.face_rectangles.append(face)

#     def update_end_frame(self, end_frame):
#         self.end_frame = end_frame

# face_tracks = []
# active_tracks = []
# face_tracks_start_end = []

# for i, frame_faces in enumerate(face_rectangles):
#     for track in active_tracks:
#         faceFound = False
#         for j,face in enumerate(frame_faces):
#             print(j, track.face_rectangles[-1], face)
#             if iou(face, track.face_rectangles[-1]) > 0.5:
#                 # track.append(face)
#                 track.add_face(face)
#                 # remove the face from the frame_faces
#                 # frame_faces = np.delete(frame_faces, j)
#                 faceFound = True
#                 break
#         if not faceFound:
#             active_tracks.remove(track)
#             track.update_end_frame(i)
#             face_tracks.append(track)
#     # start new tracks for the remaining faces
#     for face in frame_faces:
#         # create a new track
#         newTrack = FaceTracker(i)
#         newTrack.add_face(face)
#         active_tracks.append(newTrack)
    
            

# for i, frame_faces in enumerate(face_rectangles):
#     for face in frame_faces:
#         face_found = False
#         for j, track in enumerate(face_tracks):
#             print(track[-1])
#             if iou(face, track[-1]) > 0.5:
#                 track.append(face)
#                 face_tracks_start_end[j] = (face_tracks_start_end[j][0], i)
#                 face_found = True
#                 break
#         if not face_found:
#             face_tracks.append([face])
#             face_tracks_start_end.append((i, i))

In [93]:
class FaceTracker:
    def __init__(self, start_frame, face_rect):
        self.start_frame = start_frame
        self.end_frame = start_frame
        self.face_rect = face_rect
        self.track = [face_rect]

    def update(self, frame_number, new_face_rect):
        iou = calculate_iou(self.face_rect, new_face_rect)
        if iou > 0.5:
            self.end_frame = frame_number
            self.face_rect = new_face_rect
            self.track.append(new_face_rect)
            return True
        else:
            return False

def calculate_iou(rect1, rect2):
    # Calculate the intersection rectangle
    xA = max(rect1[0], rect2[0])
    yA = max(rect1[1], rect2[1])
    xB = min(rect1[0] + rect1[2], rect2[0] + rect2[2])
    yB = min(rect1[1] + rect1[3], rect2[1] + rect2[3])

    # Compute the area of intersection rectangle
    interArea = max(0, xB - xA) * max(0, yB - yA)

    # Compute the area of both rectangles
    boxAArea = rect1[2] * rect1[3]
    boxBArea = rect2[2] * rect2[3]

    # Compute IoU
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

# Create face trackers based on face detections in consecutive frames
def create_face_tracks(face_rectangles):
    face_tracks = []
    active_tracks = []

    for i, faces in enumerate(face_rectangles):
        for face in faces:
            face_found = False
            for track in active_tracks:
                if track.update(i, face):
                    face_found = True
                    break
            if not face_found:
                new_track = FaceTracker(i, face)
                active_tracks.append(new_track)

        # End existing tracks if faces are not visible in the next frame
        for track in active_tracks:
            if track.end_frame == i - 1:
                face_tracks.append(track)
                active_tracks.remove(track)

    # Add remaining active tracks
    face_tracks.extend(active_tracks)

    return face_tracks

# Example usage:
# Assuming face_rectangles is a list of lists containing face detections in consecutive frames
face_tracks = create_face_tracks(face_rectangles)

# Print face tracks
for track in face_tracks:
    print("Start frame:", track.start_frame)
    print("End frame:", track.end_frame)
    print("Face rectangle:", track.face_rect)
    print()

print(len(face_tracks))

Start frame: 4
End frame: 4
Face rectangle: [503 157  74  74]

Start frame: 6
End frame: 8
Face rectangle: [501 166  58  58]

Start frame: 12
End frame: 13
Face rectangle: [537 173  59  59]

Start frame: 15
End frame: 16
Face rectangle: [542 148  68  68]

Start frame: 17
End frame: 17
Face rectangle: [387 182  26  26]

Start frame: 21
End frame: 21
Face rectangle: [404  89 144 144]

Start frame: 23
End frame: 24
Face rectangle: [502 148  65  65]

Start frame: 25
End frame: 25
Face rectangle: [257 310  76  76]

Start frame: 25
End frame: 26
Face rectangle: [384 185  28  28]

Start frame: 27
End frame: 27
Face rectangle: [271 322  53  53]

Start frame: 30
End frame: 30
Face rectangle: [384 185  30  30]

Start frame: 31
End frame: 31
Face rectangle: [739 145  28  28]

Start frame: 32
End frame: 32
Face rectangle: [472 149  62  62]

Start frame: 33
End frame: 33
Face rectangle: [386 187  28  28]

Start frame: 40
End frame: 40
Face rectangle: [257 230  43  43]

Start frame: 43
End frame: 43

In [96]:
# indicate the face tracks in the video using integer labels and save the video
def indicate_face_tracks(image_path, face_tracks, frame_num, tracks_dir):
    # Load the image
    img = cv2.imread(image_path)

    # Draw rectangle around the faces if they are part of a track
    for i, track in enumerate(face_tracks):
        if frame_num >= track.start_frame and frame_num <= track.end_frame:
            # print(frame_num - track.start_frame)
            if (frame_num - track.start_frame) >= len(track.track):
                continue
            (x, y, w, h) = track.track[frame_num - track.start_frame]
            cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
            cv2.putText(img, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Save the output
    tracks_path = os.path.join(tracks_dir, os.path.basename(image_path))
    cv2.imwrite(tracks_path, img)

tracks_dir = 'tracks'
os.makedirs(tracks_dir, exist_ok=True)

for i, frame in enumerate(frame_files):
    frame_path = os.path.join(frames_dir, frame)
    indicate_face_tracks(frame_path, face_tracks, i, tracks_dir)

0
1
2
3
0
4
5
0
6
1
7
2
8
9
10
11
0
12
1
13
14
0
15
1
16
0
17
18
19
20
0
21
22
0
23
1
24
0
0
25
1
26
0
27
28
29
0
30
0
31
0
32
0
33
34
35
36
37
38
39
0
40
41
42
0
43
0
44
45
46
0
47
48
0
1
0
1
2
3
0
0
1
2
0
0
1
2
0
3
1
0
4
2
1
5
2
6
3
7
4
8


IndexError: list index out of range