# Data Exploration

This notebook demonstrates how to read a video file, extract frames, and display them using OpenCV and Matplotlib. It also shows how to detect faces in a frame using the Viola-Jones algorithm.

In [None]:
import cv2

input_video_path = '/Volumes/Patrick/VitalCamSet/Proband05/101_natural_lighting/Logitech HD Pro Webcam C920.avi'
video = cv2.VideoCapture(input_video_path)

## Video properties

In [None]:
# Get the video properties
fps = video.get(cv2.CAP_PROP_FPS)
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
duration = frame_count / fps

print(f'FPS: {fps}')
print(f'Frame count: {frame_count}')
print(f'Frame: {frame_width}x{frame_height}')
print(f'Duration: {duration:.2f} seconds')

## Display random frames

In [None]:
import random

# Seed the random number generator for reproducibility
random.seed(42)

frame_inx1 = random.randint(0, frame_count)
frame_inx2 = random.randint(0, frame_count)

video.set(cv2.CAP_PROP_POS_FRAMES, frame_inx1)
_, frame1 = video.read()

video.set(cv2.CAP_PROP_POS_FRAMES, frame_inx2)
_, frame2 = video.read()

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(10, 5))

frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)
axes[0].imshow(frame1)
axes[0].set_title(f'Frame {frame_inx1}')

frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)
axes[1].imshow(frame2)
axes[1].set_title(f'Frame {frame_inx2}')

## Frame difference

In [None]:
frame_diff = cv2.absdiff(frame1, frame2)

plt.figure(figsize=(5, 5))
plt.imshow(frame_diff)
plt.title('Difference between frames')
plt.show()

## Face detection

In [None]:
# Detect the face in the first frame with Viola Jones algorithm
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

gray_frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray_frame1, 1.3, 5)

face_detection = frame1.copy()

for (x, y, w, h) in faces:
    cv2.rectangle(face_detection, (x, y), (x + w, y + h), (255, 0, 0), 2)

plt.figure(figsize=(5, 5))
plt.imshow(face_detection)
plt.title('Face detection')
plt.show()

## YOLO Algorithm

In [None]:
import os
import numpy as np

yolo_dir = os.path.join(os.getcwd(), '..', 'data', 'yolo')

# Load the class labels
with open(yolo_dir + '/yolov3.txt', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Generate random colors for each class 
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

scale = 0.00392
# blob = cv2.dnn.blobFromImage(frame1, scale, (416, 416), (0, 0, 0), True, crop=False)

# Make a three channel image from the gray frame
gray_xxx = cv2.merge((gray_frame1, gray_frame1, gray_frame1))
blob = cv2.dnn.blobFromImage(gray_xxx, scale, (416, 416), (0, 0, 0), True, crop=False)

net = cv2.dnn.readNet(yolo_dir + '/yolov3.weights', yolo_dir + '/yolov3.cfg')
net.setInput(blob)

In [None]:
blob.shape

In [None]:
gray_frame1.shape, gray_xxx.shape

In [None]:
layer_names = net.getLayerNames()
unconnected_layers = net.getUnconnectedOutLayers()
output_layers = [layer_names[inx - 1] for inx in unconnected_layers]
output_layers

In [None]:
# run inference through the network
# and gather predictions from output layers
outs = net.forward(output_layers)

In [None]:
# initialization
class_ids = []
confidences = []
boxes = []

width, height = frame1.shape[1], frame1.shape[0]

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < 0.5)
for out in outs:
    print('out.shape', out.shape)
    for detection in out:
        # The first 4 elements are the bounding box dimensions
        box = detection[:5]

        # The rest are the class probabilities
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]

        if confidence >= 0.5:
            center_x = int(box[0] * width)
            center_y = int(box[1] * height)
            w = int(box[2] * width)
            h = int(box[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])

In [None]:
# A minimum confidence score below which detections are discarded entirely.
score_threshold = 0.5
# A non-maxima suppression threshold to filter overlapping and low-confidence boxes.
nms_threshold = 0.4

# Apply non-maximum suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold, nms_threshold)

obj_image = frame1.copy()

for inx in indices:
    x, y, w, h = boxes[inx]

    # Draw the bounding box on the frame
    color = COLORS[class_ids[inx]]
    cv2.rectangle(obj_image, (x, y), (x + w, y + h), color, 2)

    # Draw the class label
    cv2.putText(obj_image, classes[class_ids[inx]], (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

plt.figure(figsize=(5, 5))
plt.imshow(obj_image)
plt.title('YOLO Face detection')
plt.show()

In [None]:
import respiratory_extraction.utils.roi as roi

yolo = roi.YOLO()
objects = yolo.detect_classes(frame1, 'person')

obj_image_2 = frame1.copy()

for obj in objects:
    box = obj
    x, y, w, h = box

    # Draw the bounding box on the frame
    color = COLORS[0]
    cv2.rectangle(obj_image_2, (x, y), (x + w, y + h), color, 2)

plt.figure(figsize=(5, 5))
plt.imshow(obj_image_2)
plt.title('YOLO Face detection 2')
plt.show()