## Import libraries

In [1]:
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import mediapipe as mp 
import numpy as np
import os
import itertools
import random
import warnings
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore')



## Mediapipe Facemesh

Set up functionalities for detecting facial landmarks using MediaPipe, including defining specific facial feature sets like eyes, eyebrows, lips, and contours. We also initialize a configuration for detecting these landmarks in static images with refined precision and a minimum confidence threshold for detection.

In [2]:
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
LEFT_EYE = list(set(itertools.chain(*mp_face_mesh.FACEMESH_LEFT_EYE)))
RIGHT_EYE = list(set(itertools.chain(*mp_face_mesh.FACEMESH_RIGHT_EYE)))
LEFT_EYEBROW = list(set(itertools.chain(*mp_face_mesh.FACEMESH_LEFT_EYEBROW)))
RIGHT_EYEBROW = list(set(itertools.chain(*mp_face_mesh.FACEMESH_RIGHT_EYEBROW)))
LIPS = list(set(itertools.chain(*mp_face_mesh.FACEMESH_LIPS)))
CONTOURS = list(set(itertools.chain(*mp_face_mesh.FACEMESH_CONTOURS)))
OTHER = [1]
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5)

## Extract face features

Calculate Euclidean distances between facial landmarks in images for emotion detection then initializes a DataFrame to store distances in 2D and 3D from selected facial features like eyes, eyebrows, and lips, processed using MediaPipe's face mesh detection capabilities. We also limit to 100 samples for each emotion in order to have a faster processing time.

In [5]:
def euc2d(a, b):
    return np.sqrt((a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]))

def euc3d(a, b):
    return np.sqrt((a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]))

emotions = os.listdir('final_data/1/train')
face_features = pd.DataFrame({}, columns=[f"{i}" for i in range(92 * 2)] + ["y"])

for i, emotion in enumerate(emotions):
    images = os.listdir(f'final_data/1/train/{emotion}')
    selected_images = random.sample(images, 100)
    for image in selected_images:
        img = cv2.imread(f"final_data/1/train/{emotion}/{image}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.GaussianBlur(img, (3, 3), cv2.BORDER_DEFAULT)
        results = face_mesh.process(img)
        if results.multi_face_landmarks:
            shape = [(lmk.x, lmk.y, lmk.z) for lmk in results.multi_face_landmarks[0].landmark]
            shape = np.array(shape)
            nose = shape[1]
            shape = shape[LEFT_EYE + RIGHT_EYE + LEFT_EYEBROW + RIGHT_EYEBROW + LIPS]
            distances2d = [round(euc2d(nose, x), 6) for x in shape]
            distances3d = [round(euc3d(nose, x), 6) for x in shape]
            face_features.loc[len(face_features)] = distances2d + distances3d + [i]

## Preprocess Dataset

Shuffls the training data, standardizes the features using StandardScaler, converts the labels to categorical format, and reshapes the feature array to add an extra dimension for compatibility with MLP model

In [6]:
face_features = shuffle(face_features)
X = face_features.iloc[:, :-1].values
y = face_features.iloc[:, -1].values
scaler = StandardScaler()
X_train = scaler.fit_transform(X)
y_train = to_categorical(y)
X_train = X_train[..., np.newaxis]

## Model

Define a Sequential model architecture with layers of 128 and 64 neurons, and use ReLU activation for feature extraction. We also use the dropout regularization to prevent overfitting, with a final output layer using softmax activation for multi-class classification. The model is compiled with the Adam optimizer, categorical cross-entropy loss function, and accuracy metrics, trained over 20 epochs.

In [7]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(len(emotions), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=20)

Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2062 - loss: 2.1206
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.3057 - loss: 1.8294
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3387 - loss: 1.7345
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.3706 - loss: 1.6479
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4492 - loss: 1.5743
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 979us/step - accuracy: 0.4414 - loss: 1.5398
Epoch 7/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4600 - loss: 1.5127
Epoch 8/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4763 - loss: 1.4835
Epoch 9/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m