In [1]:
pip install -q pytorchvideo transformers evaluate

Note: you may need to restart the kernel to use updated packages.


In [3]:
from huggingface_hub import hf_hub_download
import tqdm


hf_dataset_identifier = "sayakpaul/ucf101-subset"
filename = "UCF101_subset.tar.gz"
file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")

In [6]:
pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp310-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)
Downloading facenet_pytorch-2.6.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading torch-2.2.2-cp310-none-macosx_11_0_arm64.whl (59.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━

In [10]:
import cv2
import time
import numpy as np
import torch
from facenet_pytorch import MTCNN

def run_face_detection_with_edges(desired_fps=10, padding_factor=0.2, use_camera_index=0):
    """
    Detects faces from webcam using MTCNN (PyTorch) and applies Canny edge detection to each face region.
    
    Args:
        desired_fps (int): Target frames per second.
        padding_factor (float): Padding around detected faces.
        use_camera_index (int): Index of the camera to use (default is 0).
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)
    video_capture = cv2.VideoCapture(use_camera_index)
    frame_delay = 1 / desired_fps

    while True:
        ret, frame = video_capture.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, _ = mtcnn.detect(rgb_frame)

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = [int(coord) for coord in box]
                width = x2 - x1
                height = y2 - y1

                x_pad = int(width * padding_factor)
                y_pad = int(height * padding_factor)
                x_new = max(0, x1 - x_pad)
                y_new = max(0, y1 - y_pad)
                width_new = width + 3 * x_pad
                height_new = height + 3 * y_pad

                face_region = rgb_frame[y_new:y_new + height_new, x_new:x_new + width_new]
                if face_region.size == 0:
                    continue

                gray_face = cv2.cvtColor(face_region, cv2.COLOR_RGB2GRAY)
                blurred_face = cv2.GaussianBlur(gray_face, (5, 5), 1.4)
                edges = cv2.Canny(blurred_face, 100, 200)
                colored_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

                frame[y_new:y_new + height_new, x_new:x_new + width_new] = cv2.cvtColor(colored_edges, cv2.COLOR_RGB2BGR)
                cv2.rectangle(frame, (x_new, y_new), (x_new + width_new, y_new + height_new), (0, 255, 0), 2)

        cv2.imshow('Face Detection with Canny Edges (PyTorch + MTCNN)', frame)

        time.sleep(frame_delay)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    
    run_face_detection_with_edges()


2025-04-12 00:44:57.998 python[52056:61217734] +[IMKClient subclass]: chose IMKClient_Modern
2025-04-12 00:44:57.998 python[52056:61217734] +[IMKInputSession subclass]: chose IMKInputSession_Modern
