# Real time emotion Detection using OpenCV

##  Initialization

conda activate face_emotion_detection

-- In windows terminal --

usbipd attach --wsl --busid 2-2

In [1]:
%pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install -r requirements.txt --upgrade

Collecting opencv-contrib-python (from -r requirements.txt (line 1))
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pandas (from -r requirements.txt (line 2))
  Downloading pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting huggingface_hub (from -r requirements.txt (line 3))
  Downloading huggingface_hub-0.25.1-py3-none-any.whl.metadata (13 kB)
Collecting numpy (from -r requirements.txt (line 5))
  Using cached numpy-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting torch (from -r requirements.txt (line 6))
  Downloading torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision (from -r requirements.txt (line 7))
  Downloading torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting python-dotenv (from -r requirements.txt (line 8))
  Downloading python_dotenv-1

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

## (Optional) Accessing webcam via WSL

Follow the instructions (up to section 2) at [this github page](https://github.com/PINTO0309/wsl2_linux_kernel_usbcam_enable_conf?tab=readme-ov-file).

Then, perform the following commands in a windows terminal with admin privilages:

Install usbipd-win
```bash
winget install usbipd
```

Open your list of usb devices
```bash
usbipd list
```

Example output:
```bash
BUSID  VID:PID    DEVICE                                                        STATE
2-1    1395:0098  EPOS GSX 300, USB Input Device                                Not shared
2-2    046d:085c  C922 Pro Stream Webcam                                        Not shared
2-4    0d8c:0050  Thronmax MDrill One Pro, USB Input Device                     Not shared
2-5    361d:0100  USB Input Device                                              Not shared
```

Find the webcam you would want to use and set it to shared
```bash
usbipd bind --busid=<BUSID>
```

Then, you may attatch the device to WSL
```bash
usbipd attach --wsl --busid=<BUSID>
```
This last step has to be performed every time WSL is launched

### Trouble Shooting

On the following line, the remote agress might be wrong and will cause linux to not be able to retrieve the correct config:
```bash
$ sudo wget -O .config https://github.com/PINTO0309/wsl2_linux_kernel_usbcam_enable_conf/raw/main/${TAGVER}/config-${WSL_DISTRO_NAME} \
  && sudo chmod 777 .config \
  && sudo make clean
```

The issue is caused by `WSL_DISTRO_NAME` not including the version number of the ubuntu distro, in that case, input the following command:

```bash
lsb_release -a
```

Example output:
```bash
No LSB modules are available.
Distributor ID: Ubuntu
Description:    Ubuntu 22.04.5 LTS
Release:        22.04
Codename:       jammy
```

Then simply run the original line again but replacing `${WSL_DISTRO_NAME}` with `Ubuntu-<Release>` where `<Release>` is the "Release" row of the output

## Downloading models from OpenCV model Zoo

In [5]:
#Cloning the opencv zoo repo
!git clone https://github.com/opencv/opencv_zoo

# installing git-lfs
!sudo apt-get install git-lfs
!git lfs install

# Loading large file from opencv_zoo
!cd opencv_zoo
# # If only want to pull required models
# !git config lfs.fetchinclude "models/face_detection_yunet, models/facial_expression_recognition"
!git lfs pull

Cloning into 'opencv_zoo'...
remote: Enumerating objects: 1933, done.[K
remote: Counting objects: 100% (891/891), done.[K
remote: Compressing objects: 100% (390/390), done.[K
remote: Total 1933 (delta 664), reused 584 (delta 499), pack-reused 1042 (from 1)[K
Receiving objects: 100% (1933/1933), 1.21 MiB | 1.18 MiB/s, done.
Resolving deltas: 100% (1207/1207), done.
Updating files: 100% (277/277), done.
git: 'lfs' is not a git command. See 'git --help'.

The most similar command is
	log
git: 'lfs' is not a git command. See 'git --help'.

The most similar command is
	log


## Face and emotion reicognition

### Imports

In [1]:
import cv2 as cv
import numpy as np
from opencv_zoo.models.face_detection_yunet.yunet import YuNet
from opencv_zoo.models.facial_expression_recognition.facial_fer_model import FacialExpressionRecog

### Config

In [8]:
# Scale the camera input
scaling_ratio = 1.0 

# Add "_int8" to run a quantized version of the model e.g. "...yunet_2023mar_int8"
face_detector_model_path = "opencv_zoo/models/face_detection_yunet/face_detection_yunet_2023mar.onnx" 
experssion_detector_model_path = "opencv_zoo/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx"

# Verbosity
verbose = True

### Get emotions from recognized faces

In [14]:
def process_faces(frame, faces, model: FacialExpressionRecog, verbose = True):
    faces_emotion = []
    # Skip processing if no faces detected
    # if not faces:
    #     return faces_emotion
    
    for face in faces:
        face_coords = face[:-1].astype(np.int32)
        infernece_result = model.infer(frame, face_coords)[0] # model.infer outputs a single element list
        emotion = model.getDesc(infernece_result)
        faces_emotion.append(emotion)
    
    if verbose:
        faces_emotion_results = ["Face {}: {}".format(idx, emotion) for idx, emotion in enumerate(faces_emotion)]
        print(faces_emotion_results)
    
    return faces_emotion

### Visualization of Model outputs

In [10]:
def visualize(input, faces, emotions, fps, thickness=2, show_facial_feature_points = False, verbose = True):
    if faces is not None:
        for idx, face in enumerate(faces):
            print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))

            coords = face[:-1].astype(np.int32)
            cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 200, 200), thickness)
            cv.putText(input, emotions[idx] ,(coords[0], coords[1]), cv.FONT_HERSHEY_PLAIN, 1.5, (255,0,0))
            if show_facial_feature_points:
                cv.circle(input, (coords[4], coords[5]), 1, (128, 0, 128), thickness)
                cv.circle(input, (coords[6], coords[7]), 1, (128, 0, 128), thickness)
                cv.circle(input, (coords[8], coords[9]), 1, (128, 0, 128), thickness)
                cv.circle(input, (coords[10], coords[11]), 1, (128, 0, 128), thickness)
                cv.circle(input, (coords[12], coords[13]), 1, (128, 0, 128), thickness)
    cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

### Initialization of model instances

In [11]:
# Initializing models
face_detector = YuNet(modelPath = face_detector_model_path,
                      inputSize = [512,512],
                      confThreshold = 0.75,
                      nmsThreshold = 0.3)
expression_detector = FacialExpressionRecog(modelPath=experssion_detector_model_path)

tm = cv.TickMeter()


### Main Inference Loop

In [19]:
deviceId = 0
cap = cv.VideoCapture(deviceId)
frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH) * scaling_ratio)
frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT) * scaling_ratio)
face_detector.setInputSize([frameWidth, frameHeight])

while True:
    # Break if cannot capture image
    hasFrame, frame = cap.read()
    if not hasFrame:
        print('No input detected ...')
        break
    
    pressedKey = cv.waitKey(1) & 0xFF
    if pressedKey == ord("q"):
        break

    frame = cv.resize(frame, (frameWidth, frameHeight))

    # Inference
    tm.start()
    faces = face_detector.infer(frame) # faces is a tuple
    emotions = process_faces(frame, faces, expression_detector, verbose)
    tm.stop()

    # Draw results on the input image
    visualize(frame, faces, emotions, tm.getFPS(), verbose = verbose)

    # Visualize results
    cv.imshow('Live', frame)
    
cap.release()
cv.destroyAllWindows()

['Face 0: happy']
Face 0, top-left coordinates: (243, 5), box width: 279, box height 250, score: 0.94
['Face 0: happy']
Face 0, top-left coordinates: (233, 9), box width: 275, box height 255, score: 0.94
['Face 0: happy']
Face 0, top-left coordinates: (236, 8), box width: 268, box height 245, score: 0.94
['Face 0: happy', 'Face 1: happy']
Face 0, top-left coordinates: (254, -2), box width: 264, box height 215, score: 0.92
Face 1, top-left coordinates: (232, 410), box width: 55, box height 67, score: 0.86
['Face 0: neutral']
Face 0, top-left coordinates: (309, 1), box width: 237, box height 140, score: 0.83
['Face 0: happy']
Face 0, top-left coordinates: (218, 275), box width: 39, box height 51, score: 0.82
['Face 0: happy']
Face 0, top-left coordinates: (214, 188), box width: 39, box height 53, score: 0.86
['Face 0: happy']
Face 0, top-left coordinates: (235, 135), box width: 40, box height 51, score: 0.88
['Face 0: happy']
Face 0, top-left coordinates: (210, 114), box width: 46, box h