# Run this notebook to launch my model in real time with your web-camera.
  - Make sure to set up model_path in the code according to your environment so you can use the state dictionary of my tunned model that you installed on your PC;
  - The name of the dictionary: **extra-version3_resnet50_fc_layer4_layer3_tuned_kritika_data_5classes_1channel-images.pth**

In [None]:
pip install torch torchvision opencv-python matplotlib


Collecting torchNote: you may need to restart the kernel to use updated packages.

  Downloading torch-2.3.0-cp311-cp311-win_amd64.whl.metadata (26 kB)
Collecting torchvision
  Downloading torchvision-0.18.0-cp311-cp311-win_amd64.whl.metadata (6.6 kB)
Collecting opencv-python
  Downloading opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting mkl<=2021.4.0,>=2021.1.1 (from torch)
  Downloading mkl-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting intel-openmp==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Downloading intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.2 kB)
Collecting tbb==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Downloading tbb-2021.12.0-py3-none-win_amd64.whl.metadata (1.1 kB)
Downloading torch-2.3.0-cp311-cp311-win_amd64.whl (159.8 MB)
   ---------------------------------------- 0.0/159.8 MB ? eta -:--:--
   ---------------------------------------- 0.6/159.8 MB 11.8 MB/s eta 0:00:14
   ---------------------------

In [None]:
import torch
from torchvision import models
import torch.nn as nn

# Define the model architecture
model = models.resnet50(pretrained=False)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) #make sure we have 1-chane as input for conv layer
num_ftrs = model.fc.in_features
num_classes = 5
model.fc = nn.Linear(num_ftrs, num_classes)  # Update this with your number of classes

# Correct path usage with a raw string
model_path = r"C:\Users\kirillnartov\Downloads\extra-version3_resnet50_fc_layer4_layer3_tuned_kritika_data_5classes_1channel-images.pth"
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

model.eval()

# Assuming you're using CPU
device = torch.device('cpu')
model.to(device)



ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
import cv2
from torchvision import transforms
from PIL import Image

transform = transforms.Compose([
    #transforms.ToPILImage(),
    #transforms.Resize((224, 224)),
    transforms.Resize((270, 270)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229])
])

# Load the cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')


In [None]:
# Class names for predictions
class_names = ['anger', 'fear', 'happy', 'neutral', 'sad']

In [None]:
import cv2
from PIL import Image
import torch
import numpy as np

# Camera capture setup
cap = cv2.VideoCapture(0)

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break

        # Convert to grayscale for face detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Detect faces in the image
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        for (x, y, w, h) in faces:
            # Extract the face region from the frame
            face_region = gray[y:y+h, x:x+w]

            # Convert face region to a PIL Image
            pil_image = Image.fromarray(face_region)

            # Apply transformations
            face_tensor = transform(pil_image).unsqueeze(0).to(device)

            # Model prediction
            with torch.no_grad():
                output = model(face_tensor)
                _, predicted = torch.max(output, 1)
                predicted_emotion = class_names[predicted.item()]

            # Draw rectangle around the face and annotate predicted emotion
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
            cv2.putText(frame, predicted_emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)

        # Display the resulting frame
        cv2.imshow('Video', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

finally:
    cap.release()
    cv2.destroyAllWindows()


KeyboardInterrupt: 