In [1]:
import torch
import yolov5  # Assuming yolov5 package is installed

# Load the pre-trained YOLOv5 model for face detection
yolo_model = yolov5.load('yolov5s.pt')  # Use a small YOLOv5 model for this example
yolo_model.conf = 0.5  # Confidence threshold
yolo_model.iou = 0.45  # IOU threshold for NMS


YOLOv5  2024-7-18 Python-3.9.19 torch-2.3.1+cpu CPU

  from .autonotebook import tqdm as notebook_tqdm
Fusing layers... 
YOLOv5s summary: 270 layers, 7235389 parameters, 0 gradients, 16.6 GFLOPs
Adding AutoShape... 


In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms

# Define the ShallowCNN model class
class ShallowCNN(nn.Module):
    def __init__(self):
        super(ShallowCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=0)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=0)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 7)
        self.dropout = nn.Dropout(p=0.5)
    
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = torch.relu(self.conv3(x))
        x = self.pool(x)
        x = torch.relu(self.conv4(x))
        x = self.pool(x)
        x = x.view(-1, 256 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Load the saved Shallow CNN model
model_save_path = "shallow_cnn_emotion_model_120epochs.pth"
shallow_cnn_model = ShallowCNN()
shallow_cnn_model.load_state_dict(torch.load(model_save_path))
shallow_cnn_model.eval()  # Set the model to evaluation mode


ShallowCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=7, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [4]:
import cv2
import numpy as np

# Define emotion labels
emotions = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

# Define image transformation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Open a connection to the webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert frame to RGB (YOLOv5 expects RGB images)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Perform face detection using YOLOv5
    results = yolo_model(rgb_frame)
    
    for det in results.xyxy[0]:  # For each detection
        xmin, ymin, xmax, ymax, conf, cls = map(int, det)
        
        # Extract the face region from the frame
        face = frame[ymin:ymax, xmin:xmax]
        
        # Transform the face image
        face_tensor = transform(face)
        face_tensor = face_tensor.unsqueeze(0)  # Add batch dimension
        
        # Perform expression classification using the Shallow CNN model
        with torch.no_grad():
            outputs = shallow_cnn_model(face_tensor)
            _, predicted = torch.max(outputs, 1)
            emotion = emotions[predicted.item()]
        
        # Draw bounding box and emotion label on the frame
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
        cv2.putText(frame, emotion, (xmin, ymin-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    # Display the frame
    cv2.imshow('Emotion Detection', frame)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 