In [2]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])


dataset = datasets.ImageFolder(root='data/Training', transform=transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

In [3]:
import torch
import torch.nn as nn
from torchvision import models


model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2) 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\hp/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [01:41<00:00, 1.01MB/s]


In [4]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%')

# Save the model
torch.save(model.state_dict(), 'gender_classification_resnet50.pth')


Epoch [1/10], Loss: 0.1196, Accuracy: 95.76%
Epoch [2/10], Loss: 0.0890, Accuracy: 96.90%
Epoch [3/10], Loss: 0.0796, Accuracy: 97.17%
Epoch [4/10], Loss: 0.0741, Accuracy: 97.33%
Epoch [5/10], Loss: 0.0676, Accuracy: 97.55%
Epoch [6/10], Loss: 0.0636, Accuracy: 97.72%
Epoch [7/10], Loss: 0.0568, Accuracy: 97.96%
Epoch [8/10], Loss: 0.0538, Accuracy: 98.06%
Epoch [9/10], Loss: 0.0499, Accuracy: 98.22%
Epoch [10/10], Loss: 0.0446, Accuracy: 98.29%


In [7]:
import cv2
from PIL import Image
import numpy as np
from facenet_pytorch import MTCNN

mtcnn = MTCNN(keep_all=True, device=device)

model.load_state_dict(torch.load('gender_classification_resnet50.pth'))
model.eval()

def detect_and_classify():
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        boxes, _ = mtcnn.detect(frame)

        if boxes is not None:
            for box in boxes:
                # Extract the face region
                x1, y1, x2, y2 = map(int, box)
                face = frame[y1:y2, x1:x2]

                face_pil = Image.fromarray(face)
                face_tensor = transform(face_pil).unsqueeze(0).to(device)

                with torch.no_grad():
                    outputs = model(face_tensor)
                    _, predicted = torch.max(outputs, 1)
                    gender = 'Female' if predicted.item() == 0 else 'Male'

                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                cv2.putText(frame, gender, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

        cv2.imshow('Gender Classification', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


detect_and_classify()


  model.load_state_dict(torch.load('gender_classification_resnet50.pth'))


KeyboardInterrupt: 

: 