In [6]:
import os
import shutil
import numpy as np
import cv2 as cv
import torch
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from pathlib import Path
from face_extraction import process_image

# Define paths
MOVIE_FOLDER = 'C:\\Users\\ningw\\Desktop\\KTA3\\movies\\'
FRAME_FOLDER = 'C:\\Users\\ningw\\Desktop\\KTA3\\output_images\\'
FACES_FOLDER_TRAINING = 'C:\\Users\\ningw\\Desktop\\KTA3\\face_folder\\'
OUTPUT_FOLDER_RESULTS = 'C:\\Users\\ningw\\Desktop\\KTA3\\results\\'
RESULTS_NAME = 'first_result.csv'
OUTPUT_BASE_FOLDER = 'C:\\Users\\ningw\\Desktop\\KTA3\\clusters\\'

# Frame extraction function
def extract_frames(video_file_path, video_name, sample_rate):
    """
    Extract frames from a video at a specific sampling rate.

    Args:
        video_file_path (str): Path to the folder containing the video file.
        video_name (str): Name of the video file.
        sample_rate (int): Frequency of frames to capture.

    Returns:
        list: A list of frames as mediapipe Image objects.
    """
    print("Start extracting frames...")
    video_path = os.path.join(video_file_path, video_name)
    cap = cv.VideoCapture(video_path)
    frame_list = []

    if not cap.isOpened():
        print("Error: Cannot open video file.")
        return frame_list

    frame_idx, captured = 0, 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % sample_rate == 0:
            frame_rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
            frame_list.append(mp_image)
            captured += 1
        frame_idx += 1

    cap.release()
    print(f"Successfully extracted {captured} frames.")
    return frame_list

# Save faces function
def save_face_list(face_list, output_folder, movie_name, file_extension=".png"):
    """
    Save detected faces to files.

    Args:
        face_list (list): List of cropped face images.
        output_folder (str): Folder to save the images.
        movie_name (str): Name of the movie being processed.
        file_extension (str): File extension for the saved images.

    Returns:
        None
    """
    print("Saving faces...")
    for idx, face in enumerate(face_list):
        file_path = os.path.join(output_folder, f"{movie_name}_{idx}{file_extension}")
        cv.imwrite(file_path, face)
    print(f"Saved {len(face_list)} faces to {output_folder}.")

# Extract faces function
def extract_faces(image, detection_result, padding_x, padding_y):
    """
    Extract faces from an image based on face detection results.

    Args:
        image (np.ndarray): Input image.
        detection_result (mediapipe.tasks.python.vision.Detection): Face detection results.
        padding_x (int): Horizontal padding.
        padding_y (int): Vertical padding.

    Returns:
        list: List of cropped face images.
    """
    cropped_faces = []
    for detection in detection_result.detections:
        bbox = detection.bounding_box
        x_start = max(0, bbox.origin_x - padding_x)
        y_start = max(0, bbox.origin_y - padding_y)
        x_end = min(image.shape[1], bbox.origin_x + bbox.width + padding_x)
        y_end = min(image.shape[0], bbox.origin_y + bbox.height + padding_y)
        cropped_face = image[y_start:y_end, x_start:x_end]
        if cropped_face.size > 0 and cropped_face.shape[0] > 10 and cropped_face.shape[1] > 10:
            cropped_faces.append(cv.cvtColor(cropped_face, cv.COLOR_RGB2BGR))
    return cropped_faces

# Embed face function
def embed_face_net(image):
    """
    Generate a facial embedding from an image using a pretrained FaceNet model.

    Args:
        image (np.ndarray): Cropped face image.

    Returns:
        torch.Tensor: Facial embedding vector.
    """
    model = InceptionResnetV1(pretrained='vggface2').eval()
    img_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((160, 160)),
        transforms.ToTensor()
    ])
    img_tensor = transform(img_rgb).unsqueeze(0)
    with torch.no_grad():
        return model(img_tensor)

# Main processing
MOVIE_TRAINING_LIST = ["New Kids ABC", "New Kids Fussballspiel", "New Kids Turbo_Tankstation"]
RESULTS_CSV = os.path.join(OUTPUT_FOLDER_RESULTS, RESULTS_NAME)
output_extension = ".png"
input_extension = ".mp4"
samples_per_second = 25
padding_x = 10
padding_y = 10
min_confidence = 0.6

# Ensure directories exist
Path(FACES_FOLDER_TRAINING).mkdir(parents=True, exist_ok=True)

for movie in MOVIE_TRAINING_LIST:
    print(f"Processing {movie}...")
    frames = extract_frames(MOVIE_FOLDER, movie + input_extension, samples_per_second)
    faces = []
    for frame in frames:
        detection_result = process_image(frame, padding_x, padding_y, min_confidence)
        faces.extend(extract_faces(frame, detection_result, padding_x, padding_y))
    save_face_list(faces, FACES_FOLDER_TRAINING, movie, output_extension)

print("Processing complete!")



Processing New Kids ABC...
Start extracting frames...
Error: Cannot open video file.
Saving faces...
Saved 0 faces to C:\Users\ningw\Desktop\KTA3\face_folder\.
Processing New Kids Fussballspiel...
Start extracting frames...
Error: Cannot open video file.
Saving faces...
Saved 0 faces to C:\Users\ningw\Desktop\KTA3\face_folder\.
Processing New Kids Turbo_Tankstation...
Start extracting frames...
Error: Cannot open video file.
Saving faces...
Saved 0 faces to C:\Users\ningw\Desktop\KTA3\face_folder\.
Processing complete!
