# Real-time Emotion and Age Detection from Webcam Feed

This notebook demonstrates how to use pre-trained models from Hugging Face and FastAI for real-time emotion and age detection using a webcam feed.

## Importing Libraries

First, we need to import the necessary libraries.


In [71]:
import cv2
import torch
import numpy as np
import logging
import os
from collections import deque, Counter
from fastai.vision.all import *
from transformers import AutoModelForImageClassification, AutoImageProcessor

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


## Setting Up the Device

Ensure PyTorch is using the GPU if available.


In [72]:
# Ensure PyTorch is using GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {device}")

2024-07-24 03:04:26,239 - INFO - Using device: cuda


## Loading Pre-trained Models and Processors

Load the pre-trained emotion and age detection models along with their respective processors from Hugging Face.


In [73]:
# Load the pre-trained emotion model and image processor from Hugging Face
emotion_model_name = "trpakov/vit-face-expression"
logging.info(f"Loading emotion model: {emotion_model_name}")
emotion_model = AutoModelForImageClassification.from_pretrained(emotion_model_name).to(device)
emotion_processor = AutoImageProcessor.from_pretrained(emotion_model_name, use_fast=True)

# Load the pre-trained age detection model and processor from Hugging Face
age_model_name = "nateraw/vit-age-classifier"
logging.info(f"Loading age model: {age_model_name}")
age_model = AutoModelForImageClassification.from_pretrained(age_model_name).to(device)
age_processor = AutoImageProcessor.from_pretrained(age_model_name, use_fast=True)


2024-07-24 03:04:26,243 - INFO - Loading emotion model: trpakov/vit-face-expression
2024-07-24 03:04:27,044 - INFO - Loading age model: nateraw/vit-age-classifier


## Setting Up a Rolling Window for Emotions

Define a rolling window size and initialize a deque to store detected emotions.


In [74]:
# Define the rolling window size
ROLLING_WINDOW_SIZE = 15

# Initialize deques to store detected emotions and ages
emotion_window = deque(maxlen=ROLLING_WINDOW_SIZE)
age_window = deque(maxlen=ROLLING_WINDOW_SIZE)


## Loading Haar Cascade for Face Detection

Load OpenCV's pre-trained Haar Cascade for face detection.


In [75]:
# Load OpenCV's pre-trained Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')


## Defining Helper Functions

Define functions for detecting blue images, getting the active camera, and processing faces.


In [76]:
def is_image_blue(frame, blue_threshold=50):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    lower_blue = np.array([100, 150, 0])
    upper_blue = np.array([140, 255, 255])
    mask = cv2.inRange(hsv, lower_blue, upper_blue)
    blue_area = np.sum(mask > 0)
    total_area = mask.shape[0] * mask.shape[1]
    blue_percentage = (blue_area / total_area) * 100
    return blue_percentage > blue_threshold

CAMERA_INDEX_FILE = "camera_index.txt"

def save_camera_index(index):
    with open(CAMERA_INDEX_FILE, "w") as f:
        f.write(str(index))

def load_camera_index():
    if os.path.exists(CAMERA_INDEX_FILE):
        with open(CAMERA_INDEX_FILE, "r") as f:
            return int(f.read())
    return None

def get_active_camera(max_cameras=10, blue_threshold=50):
    logging.info(f"Checking up to {max_cameras} cameras for activity...")
    
    recent_camera_index = load_camera_index()
    if recent_camera_index is not None:
        logging.info(f"Active camera found at recent index: {recent_camera_index}")
        return recent_camera_index
    
    for camera_index in range(max_cameras):
        logging.info(f"Checking camera index: {camera_index}")
        cap = cv2.VideoCapture(camera_index)
        if cap.isOpened():
            ret, frame = cap.read()
            cap.release()
            if ret:
                if not is_image_blue(frame, blue_threshold):
                    logging.info(f"Active camera found at index: {camera_index}")
                    save_camera_index(camera_index)
                    return camera_index
    logging.warning("No active camera found that is not predominantly blue.")
    return None


## Processing Faces

Define a function to process each face in the frame for emotion and age detection.


In [77]:
EMOTION_SCORE_THRESHOLD = 0.8
AGE_SCORE_THRESHOLD = 0.5

def process_face(face_img, emotion_model, emotion_processor, age_model, age_processor):
    face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
    face_img_rgb = PILImage.create(face_img_rgb)
    emotion_inputs = emotion_processor(images=[face_img_rgb], return_tensors="pt").to(device)
    
    with torch.no_grad():
        emotion_outputs = emotion_model(**emotion_inputs)
        emotion_probs = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
        predicted_emotion_class = torch.argmax(emotion_probs, dim=-1)
    
    emotion_labels = emotion_model.config.id2label
    emotion = emotion_labels[predicted_emotion_class.item()]
    emotion_score = emotion_probs[0, predicted_emotion_class].item()
    
    age_inputs = age_processor(images=[face_img_rgb], return_tensors="pt").to(device)
    
    with torch.no_grad():
        age_outputs = age_model(**age_inputs)
        age_probs = torch.nn.functional.softmax(age_outputs.logits, dim=-1)
        predicted_age_class = torch.argmax(age_probs, dim=-1)
    
    age_labels = age_model.config.id2label
    age = age_labels[predicted_age_class.item()]
    age_score = age_probs[0, predicted_age_class].item()
    
    # Only append and process emotion and age if the scores are 80% or more
    
    most_common_emotion = None
    most_common_age = None
    
    if emotion_score >= EMOTION_SCORE_THRESHOLD:
        emotion_window.append(emotion)
        most_common_emotion = Counter(emotion_window).most_common(1)[0][0]

    if age_score >= AGE_SCORE_THRESHOLD:
        age_window.append(age)
        most_common_age = Counter(age_window).most_common(1)[0][0]
    
    
    return most_common_emotion, emotion_score, most_common_age, age_score

## Processing Frames

Define a function to process each frame from the webcam feed.


In [78]:
FONT_SCALE = 0.5

def process_frame(frame, emotion_model, emotion_processor, age_model, age_processor):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    face_count = len(faces)
    
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (120,120,120), 2)
        face_img = frame[y:y + h, x:x + w]
        most_common_emotion, emotion_score, most_common_age, age_score = process_face(face_img, emotion_model, emotion_processor, age_model, age_processor)
        
        if most_common_emotion and most_common_age:
            cv2.putText(frame, f'Emotion: {most_common_emotion} ({emotion_score:.2f})', (x, y - 30), cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE, (255, 0, 0), 2)
            cv2.putText(frame, f'Age: {most_common_age} ({age_score:.2f})', (x, y - 10), cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE, (0, 0, 255), 2)
    
    cv2.putText(frame, f'Whole Faces: {face_count}', (10, 30), cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE, (255, 255, 255), 2)
    return frame


## Main Function

Define the main function to capture and process frames from the active camera.


In [79]:
def main():
    active_camera = get_active_camera()
    if active_camera is None:
        logging.error("No active camera found that is not predominantly blue.")
        return
    
    cap = cv2.VideoCapture(active_camera)
    cv2.namedWindow('Webcam', cv2.WINDOW_NORMAL)
    cv2.setWindowProperty('Webcam', cv2.WND_PROP_TOPMOST, 1)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        processed_frame = process_frame(frame, emotion_model, emotion_processor, age_model, age_processor)
        cv2.imshow('Webcam', processed_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()


## Run the Application

Execute the main function to start the webcam feed and perform real-time emotion and age detection.


In [80]:
if __name__ == '__main__':
    main()


2024-07-24 03:04:28,736 - INFO - Checking up to 10 cameras for activity...
2024-07-24 03:04:28,737 - INFO - Active camera found at recent index: 4
