In [1]:
import csv
import glob
import os
import re
from math import ceil
from scipy import signal
from scipy import sparse
from unsupervised_methods.methods import POS_WANG
from unsupervised_methods import utils
import math
from multiprocessing import Pool, Process, Value, Array, Manager

import cv2
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from tqdm import tqdm


In [2]:
def face_detection(frame, use_larger_box=True, larger_box_coef=1.0):
        """Face detection on a single frame.

        Args:
            frame(np.array): a single frame.
            use_larger_box(bool): whether to use a larger bounding box on face detection.
            larger_box_coef(float): Coef. of larger box.
        Returns:
            face_box_coor(List[int]): coordinates of face bouding box.
        """

        detector = cv2.CascadeClassifier(
           '/Users/qpingwin/labs/KlarityAII/rPPG-Toolbox/dataset/haarcascade_frontalface_default.xml')
        face_zone = detector.detectMultiScale(frame)
        if len(face_zone) < 1:
            print("ERROR: No Face Detected")
            face_box_coor = [0, 0, frame.shape[0], frame.shape[1]]
        elif len(face_zone) >= 2:
            face_box_coor = np.argmax(face_zone, axis=0)
            face_box_coor = face_zone[face_box_coor[2]]
            print("Warning: More than one faces are detected(Only cropping the biggest one.)")
        else:
            face_box_coor = face_zone[0]
        if use_larger_box:
            face_box_coor[0] = max(0, face_box_coor[0] - (larger_box_coef - 1.0) / 2 * face_box_coor[2])
            face_box_coor[1] = max(0, face_box_coor[1] - (larger_box_coef - 1.0) / 2 * face_box_coor[3])
            face_box_coor[2] = larger_box_coef * face_box_coor[2]
            face_box_coor[3] = larger_box_coef * face_box_coor[3]
        return face_box_coor

def crop_face_resize(frames, use_face_detection, use_larger_box= True , larger_box_coef=1.0, use_dynamic_detection=True, 
                         detection_freq=60, use_median_box=False, width=128, height=128):
        """Crop face and resize frames.

        Args:
            frames(np.array): Video frames.
            use_dynamic_detection(bool): If False, all the frames use the first frame's bouding box to crop the faces
                                         and resizing.
                                         If True, it performs face detection every "detection_freq" frames.
            detection_freq(int): The frequency of dynamic face detection e.g., every detection_freq frames.
            width(int): Target width for resizing.
            height(int): Target height for resizing.
            use_larger_box(bool): Whether enlarge the detected bouding box from face detection.
            use_face_detection(bool):  Whether crop the face.
            larger_box_coef(float): the coefficient of the larger region(height and weight),
                                the middle point of the detected region will stay still during the process of enlarging.
        Returns:
            resized_frames(list[np.array(float)]): Resized and cropped frames
        """
        # Face Cropping
        if use_dynamic_detection:
            num_dynamic_det = ceil(frames.shape[0] / detection_freq)
        else:
            num_dynamic_det = 1
        face_region_all = []
        # Perform face detection by num_dynamic_det" times.
        for idx in range(num_dynamic_det):
            if use_face_detection:
                face_region_all.append(face_detection(frames[detection_freq * idx], use_larger_box, larger_box_coef))
            else:
                face_region_all.append([0, 0, frames.shape[1], frames.shape[2]])
        face_region_all = np.asarray(face_region_all, dtype='int')
        if use_median_box:
            # Generate a median bounding box based on all detected face regions
            face_region_median = np.median(face_region_all, axis=0).astype('int')


        # Frame Resizing
        resized_frames = np.zeros((frames.shape[0], height, width, 3))
        for i in range(0, frames.shape[0]):
            frame = frames[i]
            if use_dynamic_detection:  # use the (i // detection_freq)-th facial region.
                reference_index = i // detection_freq
            else:  # use the first region obtrained from the first frame.
                reference_index = 0
            if use_face_detection:
                if use_median_box:
                    face_region = face_region_median
                else:
                    face_region = face_region_all[reference_index]
                frame = frame[max(face_region[1], 0):min(face_region[1] + face_region[3], frame.shape[0]),
                        max(face_region[0], 0):min(face_region[0] + face_region[2], frame.shape[1])]
            resized_frames[i] = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
        return resized_frames

In [3]:
video_path = '/Users/qpingwin/labs/KlarityAII/My_dataset/subject1/vid.avi'
cap = cv2.VideoCapture(video_path)

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    # Stack frames to form a batch for demonstration (here batch size = 1)
    batch_frames = np.stack([frame])

    # Apply crop and resize on the batch of frames
    resized_frames = crop_face_resize(batch_frames, use_face_detection=True)
    
    # Since we're working with a batch of size 1, take the first frame
    resized_frame = resized_frames[0].astype(np.uint8)

    # Resize to the original dimensions for visualization
    resized_frame = cv2.resize(resized_frame, (frame.shape[1], frame.shape[0]))

    # Display the original frame
    cv2.imshow('Original', frame)

    # Display the resized frame
    cv2.imshow('Cropped', resized_frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Failed to grab frame


: 

In [2]:
def face_detection(frame, use_larger_box=True, larger_box_coef=1.5):
        """Face detection on a single frame.

        Args:
            frame(np.array): a single frame.
            use_larger_box(bool): whether to use a larger bounding box on face detection.
            larger_box_coef(float): Coef. of larger box.
        Returns:
            face_box_coor(List[int]): coordinates of face bouding box.
        """

        detector = cv2.CascadeClassifier(
           '/Users/qpingwin/labs/KlarityAII/rPPG-Toolbox/dataset/haarcascade_frontalface_default.xml')
        face_zone = detector.detectMultiScale(frame)
        if len(face_zone) < 1:
            print("ERROR: No Face Detected")
            face_box_coor = [0, 0, frame.shape[0], frame.shape[1]]
        elif len(face_zone) >= 2:
            face_box_coor = np.argmax(face_zone, axis=0)
            face_box_coor = face_zone[face_box_coor[2]]
            print("Warning: More than one faces are detected(Only cropping the biggest one.)")
        else:
            face_box_coor = face_zone[0]
        if use_larger_box:
            face_box_coor[0] = max(0, face_box_coor[0] - (larger_box_coef - 1.0) / 2 * face_box_coor[2])
            face_box_coor[1] = max(0, face_box_coor[1] - (larger_box_coef - 1.0) / 2 * face_box_coor[3])
            face_box_coor[2] = larger_box_coef * face_box_coor[2]
            face_box_coor[3] = larger_box_coef * face_box_coor[3]
        return face_box_coor

def crop_face_resize(frames, use_face_detection, use_larger_box= False , larger_box_coef=1.2, use_dynamic_detection=True, 
                         detection_freq=10, use_median_box=False, width=72, height=72):
    prev_face_region = None
    num_frames = frames.shape[0]

    # Face Cropping
    face_region_all = []
    for i in range(num_frames):
        if use_dynamic_detection and i % detection_freq == 0:
            if use_face_detection:
                face_region = face_detection(frames[i], use_larger_box, larger_box_coef)
            else:
                face_region = [0, 0, frames.shape[2], frames.shape[1]]  # Default to full frame
        elif use_median_box:
            face_region = np.median(face_region_all, axis=0).astype('int')
        else:
            face_region = prev_face_region

        face_region_all.append(face_region)

    # Frame Resizing with smoother zooming (linear interpolation)
    resized_frames = []
    for i in range(num_frames):
        face_region = face_region_all[i]
        if prev_face_region is not None:
            # Linearly interpolate between previous and current face regions
            alpha = float(i % detection_freq) / detection_freq
            interpolated_face_region = [
                int(prev_face_region[j] + alpha * (face_region[j] - prev_face_region[j]))
                for j in range(4)
            ]

            # Crop the frame using the interpolated face region
            cropped_frame = frames[i][
                max(interpolated_face_region[1], 0):min(interpolated_face_region[1] + interpolated_face_region[3], frames.shape[1]),
                max(interpolated_face_region[0], 0):min(interpolated_face_region[0] + interpolated_face_region[2], frames.shape[2])
            ]
        else:
            cropped_frame = frames[i]

        # Resize the cropped frame to the target dimensions
        resized_frame = cv2.resize(cropped_frame, (width, height), interpolation=cv2.INTER_AREA)
        resized_frames.append(resized_frame)

        # Update the previous face region
        prev_face_region = face_region

    return np.array(resized_frames)


In [1]:
import cv2
import sys

i = 1
while i <=12:
    print(i)
    video_file = f"/Users/qpingwin/labs/KlarityAII/My_dataset/subject{i}/vid.avi"

    cap = cv2.VideoCapture(video_file)

    if not cap.isOpened():
        print("Error opening video file")
        sys.exit()

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    aspect_ratio = width / height

    target_width = 640
    target_height = 480

    scale_factor = min(target_width / width, target_height / height)

    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)

    pad_left = (target_width - new_width) // 2
    pad_right = target_width - new_width - pad_left
    pad_top = (target_height - new_height) // 2
    pad_bottom = target_height - new_height - pad_top

    pad_color = (0, 0, 0)

    target_fps = 30

    fourcc = cv2.VideoWriter_fourcc(*'XVID')

    out = cv2.VideoWriter(video_file[:-4] + '_resized.avi', fourcc, target_fps, (target_width, target_height))

    while cap.isOpened():
        ret, frame = cap.read()

        if ret:
            frame = cv2.resize(frame, (new_width, new_height))

            frame = cv2.copyMakeBorder(frame, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=pad_color)

            out.write(frame)
        else:
            break

    cap.release()
    out.release()

    print("Video processing completed")

    i+=1

1
Video processing completed
2
Video processing completed
3
Video processing completed
4
Video processing completed
5
Video processing completed
6
Video processing completed
7
Video processing completed
8
Video processing completed
9
Video processing completed
10
Video processing completed
11
Video processing completed
12
Video processing completed


In [3]:
import cv2
import numpy as np

def stabilize_frames(frames):
        """Stabilise a sequence of frames."""
        stabilized_frames = [frames[0]]  # Keep the first frame as is
        for i in range(1, len(frames)):
            prev_gray = cv2.cvtColor(frames[i - 1], cv2.COLOR_RGB2GRAY)
            cur_gray = cv2.cvtColor(frames[i], cv2.COLOR_RGB2GRAY)
            
            # Find optimal affine transformation to align frames
            warp_matrix = np.eye(2, 3, dtype=np.float32)
            criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 5000,  1e-5)
            _, warp_matrix = cv2.findTransformECC(prev_gray, cur_gray, warp_matrix, cv2.MOTION_EUCLIDEAN, criteria)
            
            # Apply transformation to align current frame with previous frame
            h, w = frames[i].shape[:2]
            stabilized_frame = cv2.warpAffine(frames[i], warp_matrix, (w, h), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
            
            stabilized_frames.append(stabilized_frame)
        
        return np.array(stabilized_frames)

i = 1

while i <= 12:
    print(f"Processing video for subject {i}")
    video_file = f"/Users/qpingwin/labs/KlarityAII/My_dataset/subject{i}/vid_resized.avi"
    
    cap = cv2.VideoCapture(video_file)
    frames = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
        else:
            break
    
    cap.release()
    
    if len(frames) > 0:
        stabilized_frames = stabilize_frames(np.array(frames))
        
        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        
        # Add '_stabilised' to the original file name to create a new file
        output_video_file = video_file[:-4] + '_stabilised.avi'
        
        out = cv2.VideoWriter(output_video_file, fourcc, 30.0, (stabilized_frames.shape[2], stabilized_frames.shape[1]))
        
        for frame in stabilized_frames:
            out.write(frame)
        
        # Release the VideoWriter
        out.release()
        print(f"Stabilisation for subject {i} complete")
    
    i += 1



Processing video for subject 1
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 1/4
Not enough matches are found - 1/4
Not enough matches are found - 2/4
Not enough matches are found - 2/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are found - 0/4
Not enough matches are f

error: OpenCV(4.8.0) /Users/xperience/GHA-OpenCV-Python/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/imgwarp.cpp:3304: error: (-215:Assertion failed) (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 in function 'warpPerspective'
