## 1. Common function mediapipe

In [1]:
import os
import cv2
import numpy as np
import mediapipe as mp


# Initialize Mediapipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, refine_landmarks=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

def DetectLandmark(frame):
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = face_mesh.process(image_rgb)
    if result.multi_face_landmarks:
        return result
    return None



def flatten_face_landmark(frame, landmarks):
    results = []

    frame_height, frame_width, _ = frame.shape
    if landmarks != None and landmarks.multi_face_landmarks:
        for face_landmarks in landmarks.multi_face_landmarks:
            # Extract the 468 landmarks
             # Extract landmark coordinates
            left, top = frame_width, frame_height
            right, bottom = 0, 0


            for point in face_landmarks.landmark:
                x = int(point.x * frame_width)
                y = int(point.y * frame_height)

                # Update the bounding box coordinates
                if x < left:
                    left = int(x)
                if y < top:
                    top = int(y)
                if x > right:
                    right = int(x)
                if y > bottom:
                    bottom = int(y)
                                
            width = right - left
            height = bottom - top

            if (width > height):
                top -= (int)(width - height) / 2
                height = width
            
            else:
                left -= (int)(height - width) / 2
                width = height
                

            ratio = float(width) / 256

            for point in face_landmarks.landmark:
                _x = float(point.x * frame_width - left) / ratio
                _y = float(point.y * frame_height - top) / ratio
                results.append([int(_x), int(_y)])  # x, y, z coordinates (normalized)

    if (results):
        return np.array(results).flatten()     
    else:
        return None
    
RED_DOT = mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=1)
GREEN_DOT = mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1, circle_radius=1)

RED_LINE = mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1)
GREEN_LINE = mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1)

    
def DrawLandmark(frame, landmarks, dot_style, line_style):
    # Draw the landmarks with the custom specs
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=landmarks,
        connections=mp_face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=dot_style,
        connection_drawing_spec=line_style
    )

## 2. Common function SVM

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle

def PrepareTrainset(dataset_dir):
    images = []
    labels = []
    
    for folder_name in os.listdir(dataset_dir):
        folder_path = os.path.join(dataset_dir, folder_name)
        if not os.path.isdir(folder_path):
            continue
        class_index, class_name = folder_name.split('.')
        
        for img_name in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_name)
            frame = cv2.imread(img_path)

            landmarks = DetectLandmark(frame)
            
            landmarks = flatten_face_landmark(frame, landmarks)
            if landmarks is not None:
                images.append(landmarks)
                labels.append(int(class_index))
            else:
                print(img_name + " is None")
    
    return np.array(images), np.array(labels)

# 3. Train the SVM Model
def train_svm(X_train, y_train):
    svm_model = SVC(kernel='linear')  # You can change the kernel type as needed
    svm_model.fit(X_train, y_train)
    return svm_model

# Save the model
def save_model(svm_model, filename):
    with open(filename, 'wb') as f:
        pickle.dump(svm_model, f)

# Load the model
def load_model(filename):
    with open(filename, 'rb') as f:
        svm_model = pickle.load(f)
    print(f"Model loaded from {filename}")
    return svm_model


## 3. Training model SVM

In [5]:
from util import GetCurrentDir


dataset_dir = os.path.join(GetCurrentDir(), "Dataset")
model_filename = 'svm_face_direction.xml'  # Specify your desired filename
class_names = ['straight', 'up', 'down', 'left', 'right']  # Add your classes

# Load dataset
# Load dataset and extract 468 landmarks
images, labels = PrepareTrainset(dataset_dir)

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2)

# Train the SVM model
svm_model = train_svm(X_train, y_train)

# Save the trained model to XML
save_model(svm_model, model_filename)

# Load the model back from the XML file
loaded_svm_model = load_model(model_filename)

# Evaluate the loaded model on the test set
y_pred = loaded_svm_model.predict(X_test)
print(f"Accuracy with loaded model: {accuracy_score(y_test, y_pred) * 100:.2f}%")

2022-10-02_17-33-15_G5Eu4oG9_V.jpg is None
2024-09-24_23-01-19.jpg is None
2024-09-21_15-59-07_XlKlPECMno.jpg is None
Model loaded from svm_face_direction.xml
Accuracy with loaded model: 93.33%


### Predict 1 image

In [98]:
frame = cv2.imread(r"C:\Users\vohungvi\Downloads\2023-02-02_06-43-45_ynIZpOogSa.jpg")
flattedLandmarks = flatten_face_landmark(frame)
flattedLandmarks = flattedLandmarks.reshape(1, -1)
prediction = loaded_svm_model.predict(flattedLandmarks)
print(prediction)
predicted_class = class_names[prediction[0]]
print(predicted_class)

[0]
straight


## Webcam

In [68]:
# 4. Test with Camera and Predict using SVM Model
import datetime, time

def test_with_camera(svm_model, class_names, enableDrawMesh):
    cap = cv2.VideoCapture(0)
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frameDraw = frame.copy()

        landmarks = DetectLandmark(frame)
        predicted_class = ""
        
        # Extract landmarks from the live video frame
        flattedLandmarks = flatten_face_landmark(frame, landmarks)
        if flattedLandmarks is not None:
            flattedLandmarks = flattedLandmarks.reshape(1, -1)  # Reshape for prediction
            prediction = svm_model.predict(flattedLandmarks)
            predicted_class = class_names[prediction[0]]
            
            # Display the predicted class on the frame
            cv2.putText(frameDraw, f"Prediction: {predicted_class}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)

            dot_style = GREEN_DOT if predicted_class == "straight" else RED_DOT
            line_style = GREEN_LINE if predicted_class == "straight" else RED_LINE

            if(enableDrawMesh and landmarks is not None):
                DrawLandmark(frameDraw, landmarks.multi_face_landmarks[0], None, line_style)

        cv2.imshow('Camera Feed', frameDraw)
        
        key = cv2.waitKey(1)
        
        if(key == ord('q')):
            break

        if(key == 13): #Enter
            imageName = (datetime.datetime.utcnow() + datetime.timedelta(hours=7)).strftime("%Y-%m-%d_%H-%M-%S") + ".jpg"
            cv2.imwrite(imageName, frame)
            print(imageName)

        if(key == 32): #space
            enableDrawMesh = not enableDrawMesh
    
    cap.release()
    cv2.destroyAllWindows()

enableDrawMesh = True
test_with_camera(loaded_svm_model, class_names, enableDrawMesh)