In [1]:
import torch
import cv2
from torchvision import transforms
from PIL import Image
import os

In [2]:
# Set device type
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
#define classes 
classes = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [4]:
from torch import nn
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3), #32, 46, 46
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3), #64, 44, 44
            nn.ReLU(),
            nn.MaxPool2d(2), #64, 22, 22
            nn.Dropout(0.3))
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3), #128, 20, 20
            #nn.BatchNorm2d(128),  
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3), #128, 18, 18
            #nn.BatchNorm2d(128),  
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), #128, 9, 9
            nn.Dropout(0.3))
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*9*9, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, len(classes)))
    def forward(self, x: torch.Tensor):
      x = self.conv1(x)
      x = self.conv2(x)
      x = self.classifier(x)
      return x

In [5]:
#load saved model
torch.manual_seed(42)
model = CNN()

# Load in the saved state_dict()
model.load_state_dict(torch.load(os.path.join(os.getcwd(), "model.pth")))

# Send model to GPU
model = model.to(device)

In [6]:
#create transforms
transform = transforms.Compose([
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
])

In [17]:
# define video capture object 
cap = cv2.VideoCapture(0) 
while(True): 
    ret, frame = cap.read() 
    if not ret:
        break
    #capture face
    face = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    grayscale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face.detectMultiScale(grayscale, scaleFactor=1.3, minNeighbors=5)

    #iterate through each face captured
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y-10), (x+w, y+h+10), (0, 255, 0), 1) #move rec up by 10, extend down by 10
        region = grayscale[y:y + h, x:x + w]
        #conver to PIL
        pil_img = Image.fromarray(region)

        # apply the transformations
        cropped_img = transform(pil_img) #resize and convert to tensor
        cropped_img = cropped_img.unsqueeze(0) #add batch dim
        cropped_img = cropped_img.to(device) #put on gpu
        
        #make prediction
        y_pred_logits = model(cropped_img) 
        y_pred_probs = torch.softmax(y_pred_logits, dim=1)
        y_pred_label = torch.argmax(y_pred_probs, dim=1)
        
        #display
        cv2.putText(frame, classes[y_pred_label], (x, y-20), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 1, cv2.LINE_AA) 
    cv2.imshow('Video Capture', cv2.resize(frame,(1280,720),interpolation = cv2.INTER_CUBIC))
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break
cap.release() 
cv2.destroyAllWindows() 