# Model Implementation


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trans      = transforms.Compose([transforms.Resize((224,224)),
                                 transforms.Grayscale(num_output_channels=1),
                                 transforms.ToTensor()])
#Define Model
class EmotionDetection(nn.Module):
    def __init__(self,num_classes):
        super(EmotionDetection, self).__init__()
        #Since we are converting image to gray scale use input channel as 1
        self.conv1 = nn.Conv2d(1, 64, kernel_size=11, stride=4)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm1 = nn.BatchNorm2d(64)

        nn.Dropout(0.25),

        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm2 = nn.BatchNorm2d(192)

        nn.Dropout(0.25),

        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)

        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)

        nn.Dropout(0.25)

        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.norm3 = nn.BatchNorm2d(256)


        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        self.fc = nn.Sequential(
            nn.Dropout(0.25),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
#         print(x.size())
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.norm1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.norm2(x)

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))

        x = self.pool3(x)
        x = self.norm3(x)
        x = self.avgpool(x)


        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

net = EmotionDetection(7)
net.load_state_dict(torch.load("/content/drive/MyDrive/Emotion Detector/Emotion_Detection.pt"))
net = net.to(device)
net

EmotionDetection(
  (conv1): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (norm3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (fc): Sequential(
 

# Complete Function to Predict new image

In [None]:
import os
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import math
os.chdir('/content/drive/MyDrive/Emotion Detector')
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
face_cascade

< cv2.CascadeClassifier 0x7bff0177bc30>

In [None]:
def predict(image, face_cascade):
    image = np.array(image)
    print("Processing Image .......")
    faces = face_cascade.detectMultiScale(image)

    # Draw Bounding Box around Face
    for x, y, w, h in faces:
        roi_color = image[y:y+h, x:x+w]
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 5)

    if len(faces) == 0:
         cv2.putText(image,'No Faces',(30,80),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2)
    else:
        plt.imshow(image)

        # Assuming 'trans' is defined somewhere in your code
        trans_image = trans(Image.fromarray(roi_color, mode='RGB'))
        trans_image = trans_image.unsqueeze(0)

        # Predict image
        with torch.no_grad():
            trans_image = trans_image.to(device)
            output = net(trans_image)
            _, prediction = torch.max(output, 1)

        print(output)
        classes = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
        print(classes[prediction.item()])
        plt.show()  # Show the final image

- Try to add logic so that the rectangles with width proportional to iunput image are used. Ignore smaller rectangles
- Add the text making the background of text as white


In [None]:
def predict_with_labels(image, face_cascade):
    image = np.array(image)
    print("Processing")
    gray_image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    # plt.imshow(gray_image)
    # plt.show()
    faces = face_cascade.detectMultiScale(gray_image)

    if len(faces) == 0:
      cv2.putText(image, "No Faces Detected", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,0,0), 2, cv2.LINE_AA)
    else:
      # Process each detected face
      print(f"number of faces{len(faces)}")
      for x, y, w, h in faces:
          print(x,y,w,h)
          if w>20 or h>20:
            # Extract the region of interest (ROI) for each face
            roi_color = image[y:y+h, x:x+w]

            # Draw a rectangle around each face
            cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 5)

            #Transform image before passing to model
            trans_image = trans(Image.fromarray(roi_color, mode='RGB'))
            trans_image = trans_image.unsqueeze(0)

            # Predict image for each face
            with torch.no_grad():
                trans_image = trans_image.to(device)
                output = net(trans_image)
                _, prediction = torch.max(output, 1)

            classes = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
            label = classes[prediction.item()]

            # Add class label with white background on top of each detected rectangle

            FONT_SCALE = 2e-2  # Adjust for larger font size in all images
            THICKNESS_SCALE = 1e-2  # Adjust for larger thickness in all images
            font_scale = min(w, h) * FONT_SCALE
            thickness = math.ceil(max(w, h) * THICKNESS_SCALE)

            cv2.putText(image, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255,0,0), thickness, cv2.LINE_AA)

            print(label)
    plt.imshow(image)
    plt.show()


In [None]:
img = Image.open("/content/family3.jpg")
predict_with_labels(img,face_cascade)

Processing
number of faces10
735 155 28 28
neutral
699 133 38 38
sad
353 139 35 35
happy
442 129 36 36
happy
536 156 33 33
happy
611 159 34 34
happy
800 138 41 41
happy
860 127 46 46
happy
194 159 47 47
fearful
193 314 61 61
neutral
