In [48]:
# Source 
# https://dev.to/edgaras/face-recognition-with-facenet-ha8 

In [None]:
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image, ImageDraw
import numpy as np
import mmcv, cv2
from torchvision import transforms
import os

# Initialize MTCNN and InceptionResnetV1
mtcnn = MTCNN(image_size=160, keep_all=True, device='cpu')
resnet = InceptionResnetV1(pretrained='vggface2').eval().to('cpu')

# Detect faces and draw bounding boxes
def detect_faces(frames):
    total_faces_detected = 0
    frames_tracked = []
    face_list = []  # List of cropped faces for embedding extraction

    for i, frame in enumerate(frames):
        print(f'\rTracking frame: {i + 1}', end='')

        # Detect faces
        frame_np = np.array(frame)

        try:
            boxes, probs = mtcnn.detect(frame_np)
        except Exception as e:
            print(f"\nError on frame {i}: {e}")
            boxes = None

        # Track how many bounding boxes
        count = 0  # Reset count for each frame

        # Draw boxes
        frame_draw = frame.copy()  # Make a copy of the frame to draw on
        draw = ImageDraw.Draw(frame_draw)

        if boxes is not None and len(boxes) > 0:
            count = len(boxes)  # Each box corresponds to detected face
            total_faces_detected += count  # Add the count of detected faces to the total count

            # Draw bounding boxes on the frame (ensure this is a PIL image)
            for box in boxes:
                draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)

            # Add to frame list (resize for display or other purposes)
            frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))

            # Crop faces based on boxes > store in face_list
            faces = mtcnn(frame)  # MTCNN returns cropped faces as a list of PIL images
            face_list.extend(faces)

        else:
            print("No boxes created")

    print(f"\nTotal number of faces detected across all frames: {total_faces_detected}")
    return frames_tracked, face_list

def save_frames_as_images(frames, output_folder):

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for i, frame in enumerate(frames):
        # Save each frame as a PNG file
        frame_filename = os.path.join(output_folder, f"Frame_{i+1}.png")
        frame.save(frame_filename)
        print(f"Saved: {frame_filename}")

# Example video frames (replace with actual frames from your video)
video = mmcv.VideoReader('video/premierleaguevideo.mp4')
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]  # Convert frames to PIL images

# Step 1: Detect faces and get cropped faces for embedding extraction
frames_tracked, faces = detect_faces(frames)

# Step 2: Save the frames to images
if frames_tracked:
    output_folder = 'processed_frames'  # Folder to save frames as images
    save_frames_as_images(frames_tracked, output_folder)
else:
    print("No frames processed.")

print('\nDone')

  from .autonotebook import tqdm as notebook_tqdm


Tracking frame: 59No boxes created
Tracking frame: 60No boxes created
Tracking frame: 61No boxes created
Tracking frame: 62No boxes created
Tracking frame: 63No boxes created
Tracking frame: 64No boxes created
Tracking frame: 65No boxes created
Tracking frame: 66No boxes created
Tracking frame: 67No boxes created
Tracking frame: 68No boxes created
Tracking frame: 69No boxes created
Tracking frame: 70No boxes created
Tracking frame: 71No boxes created
Tracking frame: 72No boxes created
Tracking frame: 73No boxes created
Tracking frame: 74No boxes created
Tracking frame: 75No boxes created
Tracking frame: 76No boxes created
Tracking frame: 78No boxes created
Tracking frame: 79No boxes created
Tracking frame: 80No boxes created
Tracking frame: 81No boxes created
Tracking frame: 82No boxes created
Tracking frame: 84No boxes created
Tracking frame: 85No boxes created
Tracking frame: 86No boxes created
Tracking frame: 87No boxes created
Tracking frame: 88No boxes created
Tracking frame: 89No

In [None]:
# Load frames from processed frames folder
def load_processed_frames(folder_path):
    
    frames = []
    for filename in sorted(os.listdir(folder_path)):
        if filename.endswith(".png"):
            image_path = os.path.join(folder_path, filename)
            frames.append(Image.open(image_path)) # Load as PIL Image
    print("Frames:", frames)
    return frames

frames_loaded = load_processed_frames('processed_frames')

Frames: [<PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x1396670C5C0>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x13775FEC3B0>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x1396670CB90>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x138F89E6870>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B43E0>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B62D0>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B7200>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B6E40>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B4560>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B6A20>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B4F20>, <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x360 at 0x139910B7680>, <PIL.PngImagePlugin.PngImageFil

In [None]:
# Extract embeddings from processed frame images
def extract_embeddings_from_frames(frames):

    embeddings_list = [] # Store embeddings for all faces detected

    # Loop through each frame
    for frame in frames:
        # Detect faces in the current frame
        faces = mtcnn(frame) # MTCNN returns cropped faces




In [None]:
# # Extract Facial Embeddings using detected faces
# def extract_embeddings(faces):
#     # Convert faces to tensors
#     faces_tensor = torch.stack([transforms.ToTensor()(face).unsqueeze(0) for face in faces])  # Convert PIL images to tensor

#     # Extract embeddings using InceptionResnetV1
#     embeddings = resnet(faces_tensor).detach().cpu().numpy()  # Extract embeddings from faces
#     print("Embeddings extracted for {} faces".format(len(faces)))  # Print number of faces processed
#     return embeddings


# # Main Process
# # Sample video frames (replace with actual frames from your video)
# frames = [Image.open('frame_1.jpg'), Image.open('frame_2.jpg')]  # Replace with actual frames

# # Step 1: Detect faces and get cropped faces for embedding extraction
# frames_tracked, faces = detect_faces(frames)

# # Step 2: Extract embeddings from cropped faces
# embeddings = extract_embeddings(faces)  # Pass the cropped faces to extract_embeddings

# Sample video
# video = mmcv.VideoReader('video/premierleaguevideo.mp4')
# frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]
# display.Video('video/premierleaguevideo.mp4', width=640)

# # Global counter to track total number of faces detected
# total_faces_detected = 0

# # Run video through MTCNN
# frames_tracked = []
# for i, frame in enumerate(frames):
#     # print('\rTracking frame: {}'.format(i + 1), end='')

#     # Detect faces
#     frame_np = np.array(frame)

#     try:
#         boxes, probs = mtcnn.detect(frame_np)
#     except Exception as e:
#         print(f"\nError on frame {i}: {e}")
#         boxes = None
    
#     # Track how many bounding boxes
#     count = 0 # Reset count for each frame

#     # Draw boxes
#     frame_draw = frame.copy()
#     draw = ImageDraw.Draw(frame_draw)

#     if boxes is not None and len(boxes) > 0:

#         # Count the number of boxes (faces) detected in frame
#         count = len(boxes) # Each box corresponds to detected face
#         total_faces_detected += count # Add the count of detected faces to the total count

#         # Draw bounding boxes on the frame
#         for box in boxes:
#             draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)

#         # Add to frame list 
#         frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))
#     else:
#         print("No boxes created")

# # After processing all frames, print the total number of faces detected
# print(f"\nTotal number of faces detected across all frames: {total_faces_detected}")
# print('\nDone')

# Save video
# dim = frames_tracked[0].size
# fourcc = cv2.VideoWriter_fourcc(*'FMP4')    
# video_tracked = cv2.VideoWriter('video_tracked.mp4', fourcc, 25.0, dim)
# for frame in frames_tracked:
#     video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
#     print("Video saved")
# else:
#     print("No video saved")
# video_tracked.release()

# Dictionary to store player templates (embedding vectors)
# player_templates = {}

# def create_player_template(player_id, face_embeddings):
#     # Create or update a player's template with the average of embeddings
#     if player_id not in player_templates:
#         # Initialise player template with the first embedding
#         player_templates[player_id] = np.array(face_embeddings)
#     else:
#         # Update the player template by averaging the new embedding with the existing one
#         player_templates[player_id] = (player_templates[player_id] + np.array(face_embeddings)) / 2
#     print(f"Updated template for {player_id}")

# def process_video_for_templates(video_path):
#     # Process video and create player templates from the detected faces

#     # Open video
#     cap = cv2.VideoCapture(video_path)
#     # Check if video opened successfully
#     if not cap.isOpened():
#         print("Error: Could not open video.")
#         return
    
#     # Read frames one by one
#     while cap.isOpened():
#         ret, frame = cap.read()

#         if not ret:
#             break # End the video

#         # Convert frame from BGR (OpenCV) to RGB (PIL)
#         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#         pil_image = Image.fromarray(frame_rgb)

#         # Detect faces in the frame
#         boxes, probs = mtcnn.detect(pil_image)

#         if boxes is not None:
#             # Extract embeddings for each detected face
#             faces = mtcnn(pil_image)
#             faces = faces.to(device) # Move faces to the same device as the model
#             embeddings = resnet(faces).detach().cpu().numpy() # Convert to numpy array

#             for emb in embeddings:
#                 player_id = 'player_1'
#                 create_player_template(player_id, emb)

#     cap.release()
#     print("Done processing video.")

# # Example usage: Process video and create templates
# video_path = 'video/premierleaguevideo.mp4' 
# process_video_for_templates(video_path)

# # Print player templates created
# print("Player templates:", player_templates)