In [2]:
import cv2
from facenet_pytorch import MTCNN
from PIL import Image
import numpy as np
import torch
from torchvision import transforms
import torch.nn as nn
from torchvision import models

# Define an emotion classification model
class EmotionClassifier(nn.Module):
    def __init__(self, num_classes=7):  # Assuming there are 7 emotions to classify
        super(EmotionClassifier, self).__init__()
        # Use a pre-trained Densenet121 model
        self.densenet = models.densenet121(pretrained=True)
        # Replace the classifier layer with a linear layer for our specific task
        num_ftrs = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        # Forward pass of the model
        return self.densenet(x)

# Load the model
model = EmotionClassifier().to('cuda')  # Move model to CUDA (GPU)
model.load_state_dict(torch.load('best_model.pt'))  # Load the best saved model
model.eval()  # Set the model to evaluation mode

# Define preprocessing for images
transform = transforms.Compose([
    transforms.Resize(256),  # Resize images to 256x256
    transforms.CenterCrop(224),  # Crop the center 224x224 portion of the image
    transforms.ToTensor(),  # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet's mean and std
])

# Function to predict emotion of a face
def predict_emotion(model, face_img):
    image_tensor = transform(face_img).unsqueeze(0).to('cuda')  # Preprocess and move to CUDA
    with torch.no_grad():  # No gradient calculation for inference
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        return probabilities.cpu().numpy()  # Return probabilities as a numpy array

# Detect and crop face from an image
def detect_and_crop_face(image_path):
    mtcnn = MTCNN(keep_all=True, device='cuda')  # Initialize MTCNN for face detection
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    boxes, _ = mtcnn.detect(img)  # Detect faces
    if boxes is not None and len(boxes) > 0:
        # Calculate area for each detected face
        areas = [(box[2]-box[0])*(box[3]-box[1]) for box in boxes]
        # Find the largest face
        max_area_idx = np.argmax(areas)
        box = boxes[max_area_idx].astype(int)
        # Crop the largest face
        cropped_face = img[box[1]:box[3], box[0]:box[2]]
        return [cropped_face]  # Return the cropped face as a list for consistency in later processing
    return None


# Integration process
image_path = '5.png'  # Replace with your image path
cropped_faces = detect_and_crop_face(image_path)
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']

if cropped_faces is not None:
    for i, face in enumerate(cropped_faces):
        face_img = Image.fromarray(face)  # Convert array to PIL Image
        probabilities = predict_emotion(model, face_img)
        predicted_emotion = emotion_labels[probabilities.argmax()]  # Get the highest probability emotion
        print(f"Face {i+1}: {predicted_emotion} with probability {probabilities.max()}")
        # Optionally display or save the cropped face and its predicted emotion
        # face_img.show()
        # face_img.save(f'cropped_face_{i}_{predicted_emotion}.png')
else:
    print("No faces were detected.")


Face 1: neutral with probability 0.8946856260299683
