##  1. Imports

In [50]:
import cv2
import mediapipe as mp
import os

## 2. Face Tracking

In [51]:
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

In [52]:
video_path = 'test.mov'  
cap = cv2.VideoCapture(video_path)

In [53]:
# Get video properties
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = cap.get(cv2.CAP_PROP_FPS)

In [54]:
width, height, fps

(2238, 1294, 59.918809201623816)

In [55]:
out = cv2.VideoWriter('output_video.mp4', 
                      cv2.VideoWriter_fourcc(*'mp4v'), 
                      fps, (width, height))


In [56]:
# Face Tracking Loop
with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) as face_detection:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert BGR to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Detect faces
        results = face_detection.process(image)

        # Draw bounding boxes
        if results.detections:
            for detection in results.detections:
                mp_drawing.draw_detection(frame, detection)

        # Write the output frame
        out.write(frame)


I0000 00:00:1748605745.326279 5252182 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M3
W0000 00:00:1748605745.373845 5450026 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


## 3. Data Preprocessing

In [57]:
from PIL import Image


In [58]:
# Input and output directories
input_root = 'IMFDB_final'
output_root = 'IMFDB_resized'
target_size = (112, 112)

In [61]:
# Resize images
for actor in os.listdir(input_root):
    actor_path = os.path.join(input_root, actor)
    if not os.path.isdir(actor_path):
        continue

    for movie in os.listdir(actor_path):
        movie_path = os.path.join(actor_path, movie)
        if not os.path.isdir(movie_path):
            continue

        output_movie_path = os.path.join(output_root, actor, movie)
        os.makedirs(output_movie_path, exist_ok=True)

        for img_file in os.listdir(movie_path):
            if img_file.lower().endswith(('.jpg')):
                input_img_path = os.path.join(movie_path, img_file)
                output_img_path = os.path.join(output_movie_path, img_file)

                try:
                    img = Image.open(input_img_path).convert('RGB')
                    img = img.resize(target_size, Image.LANCZOS)
                    img.save(output_img_path, format="JPEG", quality=95, optimize=True)
                except Exception as e:
                    print(f"Error processing {input_img_path}: {e}")

print("Resizing complete. Images saved to 'IMFDB_resized'")


Resizing complete. Images saved to 'IMFDB_resized'


## 4. Separating data into training and testing sets


In [62]:
import shutil
import random
from pathlib import Path

In [66]:
# Source and target directories
SOURCE_DIR = 'IMFDB_resized'
TARGET_DIR = 'IMFDB_split'
TRAIN_SPLIT = 0.8

In [63]:
# Set random seed for reproducibility
random.seed(42)

In [67]:
# Create target train/test directories
for split in ['train', 'test']:
    split_dir = Path(TARGET_DIR) / split
    split_dir.mkdir(parents=True, exist_ok=True)

In [68]:
# Loop through each class
for actor_dir in Path(SOURCE_DIR).iterdir():
    if actor_dir.is_dir():
        all_images = list(actor_dir.rglob('*.jpg'))  # You can add .png if needed
        random.shuffle(all_images)
        
        split_index = int(len(all_images) * TRAIN_SPLIT)
        train_images = all_images[:split_index]
        test_images = all_images[split_index:]
        
        # Helper to copy files
        def copy_files(image_list, split):
            split_actor_dir = Path(TARGET_DIR) / split / actor_dir.name
            split_actor_dir.mkdir(parents=True, exist_ok=True)
            for img_path in image_list:
                target_path = split_actor_dir / img_path.name
                shutil.copy(img_path, target_path)

        copy_files(train_images, 'train')
        copy_files(test_images, 'test')

print("Dataset split complete.")


Dataset split complete.


## 5. Creating and training a CNN based model

In [78]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os


In [79]:
# Paths
train_dir = 'IMFDB_split/train'
test_dir = 'IMFDB_split/test'


In [80]:
# Hyperparameters
batch_size = 32
num_epochs = 10
lr = 0.001
image_size = 112
num_classes = 6

In [81]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [82]:
# Data transforms
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  # Assuming grayscale or single-channel
])

In [83]:
# Datasets and loaders
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
test_dataset = datasets.ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [84]:
# Label mapping
label_map = train_dataset.class_to_idx
print("Label Mapping:", label_map)

Label Mapping: {'AmitabhBachchan': 0, 'HrithikRoshan': 1, 'JayaBhaduri': 2, 'Kajol': 3, 'KareenaKapoor': 4, 'SharukhKhan': 5}


In [85]:
# CNN Model
class FaceCNN(nn.Module):
    def __init__(self, num_classes):
        super(FaceCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # If images are RGB
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 56x56

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 28x28

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 14x14
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 14 * 14, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

In [86]:
# Instantiate model
model = FaceCNN(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [87]:
# Training loop
train_losses = []
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/10], Loss: 1.5993
Epoch [2/10], Loss: 1.1409
Epoch [3/10], Loss: 0.8671
Epoch [4/10], Loss: 0.7194
Epoch [5/10], Loss: 0.5430
Epoch [6/10], Loss: 0.4670
Epoch [7/10], Loss: 0.3951
Epoch [8/10], Loss: 0.2842
Epoch [9/10], Loss: 0.2192
Epoch [10/10], Loss: 0.2080


In [90]:
# Evaluation
model.eval()
correct = total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 74.77%


In [91]:
# Save the model
torch.jit.script(model).save("face_recognition_model.pt")
print("Model saved as face_recognition_model.pth")


Model saved as face_recognition_model.pth
