SINGLE FACE DETECTION (MEDIA PIPE + FER)

In [2]:
import cv2
import mediapipe as mp
import time
import tensorflow as tf

# Load the trained model
model = tf.keras.models.load_model('FER_64.5acc_0.99loss.h5')
# model = tf.keras.models.load_model('model.h5')
labels_dict = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

cap = cv2.VideoCapture(0)

mpFaceDetection = mp.solutions.face_detection
mpDraw = mp.solutions.drawing_utils
faceDetection = mpFaceDetection.FaceDetection(0.75)

while True:
    success, img = cap.read()

    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = faceDetection.process(imgRGB)
    # print(results)

    if results.detections:
        # for id, detection in enumerate(results.detections):       // for multiple faces
        detection = results.detections[0]  # Get the first face detection result
        bboxC = detection.location_data.relative_bounding_box
        ih, iw, ic = img.shape
        bbox = (int(bboxC.xmin * iw), int(bboxC.ymin * ih),
                int(bboxC.width * iw), int(bboxC.height * ih))

        face_img = img[bbox[1]:bbox[1] + bbox[3], bbox[0]:bbox[0] + bbox[2]]  # Extract the face region
        if face_img.shape[0] > 0 and face_img.shape[1] > 0:
            # Resize the face image to match the input size of the trained model
            face_img = cv2.resize(face_img, (48, 48))

            # Convert the face image to grayscale
            face_gray = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)

            # Normalize the pixel values to be in the range [0, 1]
            face_gray = face_gray / 255.0

            # Reshape the face image to match the input shape of the trained model
            face_gray = face_gray.reshape(1, 48, 48, 1)
        else:
            print("Error: Failed to extract face region")
        # Predict the emotion using the trained model
        emotion_probs = model.predict(face_gray)[0]
        emotion_id = tf.argmax(emotion_probs)
        emotion_label = labels_dict[emotion_id.numpy()]

        cv2.putText(img, emotion_label, (bbox[0], bbox[1] - 50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 255), 2)

        cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 255), 2)
        cv2.putText(img, f'{int(detection.score[0] * 100)}%', (bbox[0], bbox[1] - 20), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 255), 2)

    cv2.imshow("Image", img)
    k = cv2.waitKey(1)
    if k == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




MULTIFACE DETECTION (HAARCASCADE + FER)

In [None]:
import cv2
import numpy as np
from keras.models import load_model

# model=load_model('model_file_30epochs.h5')
model=load_model('FER_64.5acc_0.99loss.h5')
# model=load_model('model_mobnet.h5')

video=cv2.VideoCapture(0)

frontalfaceDetect=cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
profilefaceDetect=cv2.CascadeClassifier('haarcascade_profileface.xml')
labels_dict={0:'Angry',1:'Disgust', 2:'Fear', 3:'Happy',4:'Neutral',5:'Sad',6:'Surprise'}


while True:
    ret, frame = video.read()
    
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect frontal faces
    frontal_faces = frontalfaceDetect.detectMultiScale(gray, 1.3, 5)
    if len(frontal_faces) > 0:
        for x, y, w, h in frontal_faces:
            sub_face_img = gray[y:y+h, x:x+w]
            resized = cv2.resize(sub_face_img, (48, 48))
            normalize = resized / 255.0
            reshaped = np.reshape(normalize, (1, 48, 48, 1))
            result = model.predict(reshaped)
            label = np.argmax(result, axis=1)[0]
            print(label)
            #cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 1)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
            #cv2.rectangle(frame, (x, y-40), (x+w, y), (50, 50, 255), -1)
            cv2.putText(frame, labels_dict[label], (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            cv2.putText(frame, "frontal face", (7, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    else:
        # Detect profile faces
        profile_faces = profilefaceDetect.detectMultiScale(gray, 1.3, 5)
        if len(profile_faces) > 0:
            for x, y, w, h in profile_faces:
                sub_face_img = gray[y:y+h, x:x+w]
                resized = cv2.resize(sub_face_img, (48, 48))
                normalize = resized / 255.0
                reshaped = np.reshape(normalize, (1, 48, 48, 1))
                result = model.predict(reshaped)
                label = np.argmax(result, axis=1)[0]
                print(label)
                #cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 1)
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)
                #cv2.rectangle(frame, (x, y-40), (x+w, y), (50, 50, 255), -1)
                cv2.putText(frame, labels_dict[label], (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
                cv2.putText(frame, "profile face", (7, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                

    cv2.imshow("Frame", frame)
    k = cv2.waitKey(1)
    if k == ord('q'):
        break

video.release()
cv2.destroyAllWindows()
