<a href="https://colab.research.google.com/github/trappertwo/rPPG_experiments/blob/main/video_data_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Set Up

In [3]:
!python --version
# Imports
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

!pip install -q mediapy
import mediapy as media


Python 3.11.11
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
### UBFC-rPPG dataset in drive folder
### (TODO) Figure out how to import data set directly into colab
from google.colab import drive

drive.mount('/content/drive')
#filepath = '/content/drive/MyDrive/research_project/'
filepath = '/content/drive/MyDrive/research_project/data/UBFC-rPPG/DATASET_2/subject1/vid.avi' # @param {type: "string"}
!ls -l $filepath

Mounted at /content/drive
-r-------- 1 root root 1425830568 Nov 16  2017 /content/drive/MyDrive/research_project/data/UBFC-rPPG/DATASET_2/subject1/vid.avi


In [5]:
video = media.read_video(filepath)

In [6]:
# Compress the video with default settings (H.264 codec)
COMPRESSION_BIT_RATES = {500_000, 200_000, 100_000}

compressed_data = {}
i = 0
for bps in COMPRESSION_BIT_RATES:
    compressed_data[bps] = media.compress_video(video, bps=bps, fps=30)
    print(len(compressed_data[bps]))

3236631
1298765
653856


In [7]:
# Save the compressed video to a file
with open('/tmp/subject1_500_000.mp4', 'wb') as f:
    f.write(compressed_data[500_000])
with open('/tmp/subject1_200_000.mp4', 'wb') as f:
    f.write(compressed_data[200_000])
with open('/tmp/subject1_100_000.mp4', 'wb') as f:
    f.write(compressed_data[100_000])

In [8]:
# Show original and compressed video side-by-side
compressed_video = media.read_video('/tmp/subject1_500_000.mp4')
compressed_video_2 = media.read_video('/tmp/subject1_200_000.mp4')
compressed_video_3 = media.read_video('/tmp/subject1_100_000.mp4')
videos = {'Original': video[:10], 'Compressed_200_000': compressed_video_2[:10], 'Compressed_100_000': compressed_video_3[:10], 'Compressed_500_000': compressed_video[:10] }
media.show_videos(videos, fps=30)

0,1,2,3
Original  This browser does not support the video tag.,Compressed_200_000  This browser does not support the video tag.,Compressed_100_000  This browser does not support the video tag.,Compressed_500_000  This browser does not support the video tag.


In [24]:
## Preprocessing - face detection and cropping
import cv2
import datetime
import math
from cv2 import dnn_superres

def read_video(video_file):
    """Reads a video file, returns frames(T, H, W, 3) """
    VidObj = cv2.VideoCapture(video_file)
    VidObj.set(cv2.CAP_PROP_POS_MSEC, 0)
    success, frame = VidObj.read()
    frames = list()
    while success:
      frame = cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2RGB)
      frame = np.asarray(frame)
      frames.append(frame)
      success, frame = VidObj.read()
    return np.asarray(frames)

def face_detection(frame, use_larger_box=False, larger_box_coef=1.0):
  """Face detection on a single frame.

        Args:
            frame(np.array): a single frame.
            use_larger_box(bool): whether to use a larger bounding box on face detection.
            larger_box_coef(float): Coef. of larger box.
        Returns:
            face_box_coor(List[int]): coordinates of face bounding box.
  """
  # Use OpenCV's Haar Cascade algorithm implementation for face detection
  # This should only utilize the CPU
  detector = cv2.CascadeClassifier('/content/drive/MyDrive/research_project/DATASETS/haarcascade_frontalface_default.xml')

  # Computed face_zone(s) are in the form [x_coord, y_coord, width, height]
  # (x,y) corresponds to the top-left corner of the zone to define using
  # the computed width and height.
  face_zone = detector.detectMultiScale(frame)

  if len(face_zone) < 1:
    print("ERROR: No Face Detected")
    face_box_coor = [0, 0, frame.shape[0], frame.shape[1]]
  elif len(face_zone) >= 2:
    # Find the index of the largest face zone
    # The face zones are boxes, so the width and height are the same
    max_width_index = np.argmax(face_zone[:, 2])  # Index of maximum width
    face_box_coor = face_zone[max_width_index]
    print("Warning: More than one faces are detected. Only cropping the biggest one.")
  else:
    face_box_coor = face_zone[0]
  if use_larger_box:
    face_box_coor[0] = max(0, face_box_coor[0] - (larger_box_coef - 1.0) / 2 * face_box_coor[2])
    face_box_coor[1] = max(0, face_box_coor[1] - (larger_box_coef - 1.0) / 2 * face_box_coor[3])
    face_box_coor[2] = larger_box_coef * face_box_coor[2]
    face_box_coor[3] = larger_box_coef * face_box_coor[3]
  return face_box_coor

def superres(frame, model_path, model_name, scale):
  """Super resolution using DNN.

     Args:
       frames(np.array): Video frames.
       model_path(str): Path to the pre-trained model.
       model_name(str): Name of the model.
       scale(int): Scale factor.
     Returns:
        upscaled_frames(list[np.array(float)]): Upscaled frames
  """
  # Create a Super Resolution object
  sr = dnn_superres.DnnSuperResImpl_create()

  # Read the pre-trained model
  sr.readModel(model_path)

  # Set the model and scale
  sr.setModel(model_name, scale)

  # Upscale the image
  upscaled_frame = sr.upsample(frame)
  return upscaled_frame

def crop_face_resize(frames, use_face_detection,  use_larger_box, larger_box_coef, use_dynamic_detection,
                         detection_freq, use_median_box, width, height,
                     res_model_path, model_name, scale, denoise=False, upscale=False):
  """Crop face and resize frames.

     Args:
       frames(np.array): Video frames.
       use_face_detection(bool):  Whether crop the face.
       use_dynamic_detection(bool): If False, all the frames use the first frame's bouding box to crop the faces
                                         and resizing.
                                         If True, it performs face detection every "detection_freq" frames.
       detection_freq(int): The frequency of dynamic face detection e.g., every detection_freq frames.
       width(int): Target width for resizing.
       height(int): Target height for resizing.
       use_larger_box(bool): Whether enlarge the detected bouding box from face detection.
       larger_box_coef(float): the coefficient of the larger region(height and weight)
                             the middle point of the detected region will stay still during the process of enlarging.
     Returns:
        resized_frames(list[np.array(float)]): Resized and cropped frames
  """
  # Face Cropping
  if use_dynamic_detection:
    num_dynamic_det = math.ceil(frames.shape[0] / detection_freq)
  else:
    num_dynamic_det = 1
  face_region_all = []
  # Perform face detection by num_dynamic_det" times.
  for idx in range(num_dynamic_det):
    if use_face_detection:
       face_region_all.append(face_detection(frames[detection_freq * idx], use_larger_box, larger_box_coef))
    else:
       face_region_all.append([0, 0, frames.shape[1], frames.shape[2]])
  if use_median_box:
     # Generate a median bounding box based on all detected face regions
     face_region_median = np.median(np.asarray(face_region_all, dtype='int'), axis=0).astype('int')
  print(np.asarray(face_region_all, dtype='int'))

  # Frame Resizing
  #resized_frames = np.zeros((frames.shape[0], height, width, 3))
  resized_frames = []
  for i in range(0, frames.shape[0]):
    frame = frames[i]
    restrored_frame = frames[i]
    if use_dynamic_detection:  # use the (i // detection_freq)-th facial region.
      reference_index = i // detection_freq
    else:  # use the first region obtrained from the first frame.
      reference_index = 0
    if use_face_detection:
      if use_median_box:
         face_region = face_region_median
      else:
         face_region = np.asarray(face_region_all, dtype='int')[reference_index]
      frame = frame[max(face_region[1], 0):min(face_region[1] + face_region[3], frame.shape[0]),
         max(face_region[0], 0):min(face_region[0] + face_region[2], frame.shape[1])]
      # To remove compression noise from an image using OpenCV
      # "fastNlMeansDenoising" or "fastNlMeansDenoisingMulti"
      # function may be more suitable for removing compression artifacts.
      # "fastNlMeansDenoising" implements the Non-Local Means (NLM) denoising
      # algorithm; Considers similar pixel neighborhoods across the
      # image to reduce noise while preserving edges and details.
      # "fastNlMeansDenoisingMulti" considers similar pixels across
      # multiple frames to denoise.
      # Other methods (less effective for compression noise):
      #    Median filtering: Can be used for basic smoothing but might blur edges.
      #    Gaussian blur: May help with some noise but can also blur details.
      #    Bilateral filtering: Preserves edges better than Gaussian blur but
      #    might not be as effective for compression noise.
      if denoise:
        #restored_frame = cv2.fastNlMeansDenoisingColored(frame,None,10,10,7,21)
        restored_frame = cv2.fastNlMeansDenoisingColored(frame,None,5,5,7,21)
        #restored_frame = cv2.bilateralFilter(frame, 9, 75, 75)
      if upscale:
        # Apply super resolution
        restored_frame = superres(frame, res_model_path, model_name, scale)
        print(datetime.datetime.now().strftime("%H:%M:%S"))
        print("processed frame: " + str(i))

    # Resize the frame
    if denoise:
      resized_frame = cv2.resize(restored_frame, (width, height), interpolation=cv2.INTER_AREA)
      resized_frames.append(resized_frame)
    elif upscale:
      resized_frame = cv2.resize(restored_frame, (width, height), interpolation=cv2.INTER_AREA)
      resized_frames.append(resized_frame)
    else:
      resized_frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
      resized_frames.append(resized_frame)
    #print("Image after cropping and resizing:")
    #media.show_image(resized_frame1)
    #resized_frame2 = cv2.resize(upscaled_frame, (width, height), interpolation=cv2.INTER_AREA)
    #print("Image after upscaling:")
    #media.show_image(resized_frame2)
    #resized_frames[i] = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
  #return resized_frames
  return np.array(resized_frames)

video_file = '/tmp/subject1_200_000.mp4' # @param{type: "string"}
cropped_and_resized_file = '/tmp/subject1_200_000_cropped_and_resized.mp4' # @param{type: "string"}
frames = read_video(video_file)
print(len(frames))
#media.show_video(frames[:10], fps=30)
face_box_coor = face_detection(frames[1], use_larger_box=True, larger_box_coef=1.5)
new_frame = cv2.rectangle(frames[0], (face_box_coor[0], face_box_coor[1]), (face_box_coor[0] + face_box_coor[2], face_box_coor[1] + face_box_coor[3]), (255, 0, 0), 2)
media.show_image(new_frame)

1547


In [23]:
# Specify the path to the pre-trained model
MODEL_PATH = "/content/drive/MyDrive/research_project/vsr_model/EDSR_x4.pb"  # @param {type:"string"}
MODEL_NAME = "edsr" # @param {type: "string"}
SCALE = 4 # @param {type: "integer"}
HEIGHT = 288 # @param {type: "integer"}
WIDTH = 288 # @param {type: "integer"}
NUM_FRAMES = 150 # @param {type: "integer"}
resized_frames1 = crop_face_resize(frames, use_face_detection=True,  use_larger_box=True, larger_box_coef=1.5, use_dynamic_detection=False, detection_freq=32, use_median_box=False, width=WIDTH, height=HEIGHT, res_model_path=MODEL_PATH, model_name=MODEL_NAME, scale=SCALE, denoise=False, upscale=False)

import cv2

for frame in resized_frames1[:1]:
  #  Reduce blockiness
  # Apply Gaussian Blur
  blurred = cv2.GaussianBlur(frame, (5, 5), 0)
  #blurred = cv2.GaussianBlur(frame, (7, 7), 2)
  #media.show_image(blurred)
  # Apply Bilateral Filter (preserves edges)
  bilateral = cv2.bilateralFilter(frame, 9, 75, 75)
  #bilateral = cv2.bilateralFilter(frame, 9, 18, 18)
  # Reduce noisy edges
  #denoised = cv2.fastNlMeansDenoisingColored(frame, None, 5, 5, 7, 21)
  denoised = cv2.fastNlMeansDenoisingColored(frame, None, 5, 5, 7, 21)
  # Sharpen
  sharpened = cv2.addWeighted(frame, 2.5, blurred, -1.5, 0)
  # denoised multi
  # Parameters for denoising
  temporalWindowSize = 3  # Number of adjacent frames to use (must be odd)
  h = 5  # Filter strength for luminance
  hColor = 5  # Filter strength for color
  templateWindowSize = 7 # Size of the template patch used for searching similar pixels
  searchWindowSize = 21 # Size of the search window to find similar pixels
  # Apply fastNlMeansDenoisingColoredMulti
  denoised_multi = cv2.fastNlMeansDenoisingColoredMulti(
    resized_frames1[:11], 5, temporalWindowSize, h, hColor, templateWindowSize, searchWindowSize)

images = {
    'Original': frame,
    'Denoised Multi': denoised_multi,
    'Denoised': denoised,
    'Bilateral': bilateral,
    'Gaussian blur': blurred,
    'Sharpened': sharpened}
media.show_images(images)


[[248 145 222 222]]


0,1,2,3,4,5
Original,Denoised Multi,Denoised,Bilateral,Gaussian blur,Sharpened


In [26]:
resized_frames2 = crop_face_resize(frames[:NUM_FRAMES], use_face_detection=True,  use_larger_box=True, larger_box_coef=1.5, use_dynamic_detection=False, detection_freq=32, use_median_box=False, width=WIDTH, height=HEIGHT, res_model_path=MODEL_PATH, model_name=MODEL_NAME, scale=SCALE, denoise=True, upscale=False)
resized_frames3 = crop_face_resize(frames[:20], use_face_detection=True,  use_larger_box=True, larger_box_coef=1.5, use_dynamic_detection=False, detection_freq=32, use_median_box=False, width=WIDTH, height=HEIGHT, res_model_path=MODEL_PATH, model_name=MODEL_NAME, scale=SCALE, denoise=False, upscale=True)

#videos = {'Original resized': resized_frames1, 'Denoised': resized_frames2}
videos = {'Original resized': resized_frames1[:20], 'Denoised': resized_frames2[:20], 'Upscaled': resized_frames3}
media.show_videos(videos, fps=30)

[[248 145 222 222]]
[[248 145 222 222]]
22:16:01
processed frame: 0
22:16:12
processed frame: 1
22:16:23
processed frame: 2
22:16:35
processed frame: 3
22:16:47
processed frame: 4
22:16:59
processed frame: 5
22:17:10
processed frame: 6
22:17:22
processed frame: 7
22:17:33
processed frame: 8
22:17:45
processed frame: 9
22:17:56
processed frame: 10
22:18:08
processed frame: 11
22:18:20
processed frame: 12
22:18:31
processed frame: 13
22:18:43
processed frame: 14
22:18:54
processed frame: 15
22:19:06
processed frame: 16
22:19:18
processed frame: 17
22:19:28
processed frame: 18
22:19:39
processed frame: 19


0,1,2
Original resized  This browser does not support the video tag.,Denoised  This browser does not support the video tag.,Upscaled  This browser does not support the video tag.


In [33]:
import cv2
import os

def frames_to_video(frames, output_path, fps=30):
    """Converts a sequence of frames to an MPEG4 video.

    Args:
        frames: A list of frames (numpy arrays) to be converted to a video.
        output_path: Name of the output video file (e.g., 'output.mp4').
        fps: Frames per second for the output video.
    """

    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MPEG-4
    video_writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    for frame in frames:
      frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      video_writer.write(frame_rgb)

    video_writer.release()

OUTPUT_PATH = '/content/drive/MyDrive/research_project/data/preprocessed' # @param{type: "string"}
FILE_NAME = 'subject1_500_000_cropped_and_resized.mp4' # @param{type: "string"}
if not os.path.exists(OUTPUT_PATH):
  os.mkdir(OUTPUT_PATH)

frames_to_video(frames=resized_frames1, output_path=os.path.join(OUTPUT_PATH, FILE_NAME), fps=30)

In [34]:
video2 = media.read_video(os.path.join(OUTPUT_PATH, FILE_NAME))
media.show_video(video2, fps=30)

0
This browser does not support the video tag.


In [None]:
# Clone the MdVRNet (video restoration model) repository from github if not already setup
import os

dir_path = '/content/MdVRNet'
if os.path.isdir(dir_path):
  print(f"The directory '{dir_path}' exists.")
else:
  !git clone https://github.com/trappertwo/MdVRNet.git
%cd '{dir_path}'
!ls '{dir_path}'

Cloning into 'MdVRNet'...
remote: Enumerating objects: 130, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 130 (delta 13), reused 0 (delta 0), pack-reused 107 (from 1)[K
Receiving objects: 100% (130/130), 12.35 MiB | 15.08 MiB/s, done.
Resolving deltas: 100% (60/60), done.
/content/MdVRNet
dataloaders.py		  images	     README.md	       train_common.py
dataset.py		  mdvrnet.py	     requirements.txt  train_dpen.py
estimate_params.py	  models.py	     test_dpen.py      train_mdvrnet.py
generate_png_from_mp4.py  pretrained_models  test_mdvrnet.py   utils.py


In [None]:
# Explicitly install PyQt5 to use interactive plotting and avoid non-interactive backends
# See this relevant issue for more details: https://github.com/astral-sh/uv/issues/6893
# This requires Python 3.9+
!which python
!python --version
!pip install  --no-build-isolation PyQt5

/usr/local/bin/python
Python 3.11.11
Collecting PyQt5
  Downloading PyQt5-5.15.11-cp38-abi3-manylinux_2_17_x86_64.whl.metadata (2.1 kB)
Collecting PyQt5-sip<13,>=12.15 (from PyQt5)
  Downloading PyQt5_sip-12.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (472 bytes)
Collecting PyQt5-Qt5<5.16.0,>=5.15.2 (from PyQt5)
  Downloading PyQt5_Qt5-5.15.16-1-py3-none-manylinux2014_x86_64.whl.metadata (536 bytes)
Downloading PyQt5-5.15.11-cp38-abi3-manylinux_2_17_x86_64.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m100.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyQt5_Qt5-5.15.16-1-py3-none-manylinux2014_x86_64.whl (61.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyQt5_sip-12.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.whl (276 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.4/276.4 kB[0m 

In [None]:
### Setup using uv
### This gives an error when building the mamba package.
### Hence we need to directly install causal-conv1d and mamba-ssm.
### See README file under tools/mamba.
#!bash setup.sh uv
%%bash
rm -rf .venv
uv venv --python 3.8
source .venv/bin/activate
uv pip install --no-build-isolation setuptools wheel
uv pip install --no-build-isolation torch==2.1.2+cu121 torchvision==0.16.2+cu121 --index-url https://download.pytorch.org/whl/cu121
uv pip install --no-build-isolation scikit-Image
uv pip install --no-build-isolation pytest
uv pip install --no-build-isolation pycodestyle
uv pip install --no-build-isolation opencv-python
uv pip install --no-build-isolation future
uv pip install --no-build-isolation tensorboardx



Using CPython 3.8.20
Creating virtual environment at: .venv
Activate with: source .venv/bin/activate
Using Python 3.11.11 environment at: /usr
Audited 2 packages in 242ms
Using Python 3.11.11 environment at: /usr
Resolved 18 packages in 2.52s
Downloading torchvision (6.5MiB)
Downloading triton (85.1MiB)
Downloading torch (2.0GiB)
 Downloaded torchvision
 Downloaded triton
 Downloaded torch
Prepared 3 packages in 37.26s
Uninstalled 3 packages in 653ms
Installed 3 packages in 194ms
 - torch==2.5.1+cu124 (from https://download.pytorch.org/whl/cu124/torch-2.5.1%2Bcu124-cp311-cp311-linux_x86_64.whl)
 + torch==2.1.2+cu121
 - torchvision==0.20.1+cu124 (from https://download.pytorch.org/whl/cu124/torchvision-0.20.1%2Bcu124-cp311-cp311-linux_x86_64.whl)
 + torchvision==0.16.2+cu121
 - triton==3.1.0
 + triton==2.1.0
Using Python 3.11.11 environment at: /usr
Audited 1 package in 255ms
Using Python 3.11.11 environment at: /usr
Audited 1 package in 307ms
Using Python 3.11.11 environment at: /usr
Re

In [None]:
### List packages in the virtual environment
!source .venv/bin/activate
!uv pip list

[2mUsing Python 3.11.11 environment at: /usr[0m
Package                            Version
---------------------------------- -------------------
absl-py                            1.4.0
accelerate                         1.3.0
aiohappyeyeballs                   2.4.6
aiohttp                            3.11.12
aiosignal                          1.3.2
alabaster                          1.0.0
albucore                           0.0.23
albumentations                     2.0.4
ale-py                             0.10.2
altair                             5.5.0
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array-record                       0.6.0
arviz                              0.20.0
astropy                            7.0.1
astropy-iers-data                  0.2025.2.17.0.34.13
astunparse                         1.6.3
atpublic                           4.1.0
attrs      