In [1]:
import cv2
import mediapipe as mp
import joblib
import numpy as np

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands


def data_clean(landmark):

    data = landmark[0]

    try:
        data = str(data)

        data = data.strip().split('\n')

        garbage = ['landmark {', '  visibility: 0.0', '  presence: 0.0', '}']

        without_garbage = []

        for i in data:
            if i not in garbage:
                without_garbage.append(i)

        clean = []

        for i in without_garbage:
            i = i.strip()
            clean.append(i[2:])

        for i in range(0, len(clean)):
            clean[i] = float(clean[i])

        return ([clean])

    except:
        return (np.zeros([1, 63], dtype=int)[0])

In [2]:
# Function to calculate the Euclidean distance between two points
def calculate_distance(point1, point2):
    return np.linalg.norm(point1 - point2)

# Function to check hand stability based on landmark movement


def check_hand_stability(landmarks_history):
    # Check if the Euclidean distances between consecutive landmark positions are below a threshold
    threshold = 10.0
    for i in range(len(landmarks_history[0]) - 1):
        distance = calculate_distance(
            landmarks_history[0][i], landmarks_history[0][i + 1])
        if distance > threshold:
            return False
    return True



In [21]:
# For webcam input:
hands = mp_hands.Hands(min_detection_confidence=0.7,
                       min_tracking_confidence=0.5)
clf = joblib.load('E:\CourseHKII_Grade3\seminar\model_svm.pkl')
# Open webcam
cap = cv2.VideoCapture(0)

# # Create a window for displaying the output
# cv2.namedWindow("Sign Language Recognition", cv2.WINDOW_NORMAL)

# Initialize variables for storing predicted results
previous_prediction = None
predicted_string = ''
landmarks_history = []

while cap.isOpened():
    success, image = cap.read()

    # Flip the image horizontally for a later selfie-view display and convert
    # the BGR image to RGB.
    image = cv2.flip(image, 1)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    if not success:
        break

    # Process the image with MediaPipe Hands
    results = hands.process(image_rgb)

    # Draw the hand landmarks on the image
    if results.multi_hand_landmarks:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            if results.multi_handedness:
                handedness = results.multi_handedness[idx].classification[0].label
                if handedness == 'Right':
                    mp_drawing.draw_landmarks(
                        image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    # Clean hand landmarks data
                    cleaned_landmark = data_clean(results.multi_hand_landmarks)

                    if cleaned_landmark:
                        # Check hand stability based on landmark movement
                        landmarks_history.append(cleaned_landmark)
                        if len(landmarks_history) > 10:
                            landmarks_history.pop(0)
                            if check_hand_stability(landmarks_history):
                                # Make predictions using the trained model
                                y_pred = clf.predict(cleaned_landmark)
                                # Update the predicted sequence if a new prediction is made
                                if y_pred[0] != previous_prediction:
                                    previous_prediction = y_pred[0]
                                    if (y_pred[0] == 'del'):
                                        predicted_string = ''
                                        previous_prediction = None
                                    elif (y_pred[0] == 'space'):
                                        predicted_string += ' '
                                    else:
                                        predicted_string += y_pred[0]
                        # Get the size of the predicted text
                        (text_width, text_height), _ = cv2.getTextSize(
                            predicted_string[-10:], cv2.FONT_HERSHEY_SIMPLEX, 1, 2)

                        # Calculate the coordinates for the bounding box
                        x1 = 50
                        y1 = 50
                        x2 = x1 + text_width + 20
                        y2 = y1 + text_height + 20

                        # Draw the bounding box rectangle
                        if (len(predicted_string) > 0):
                            cv2.rectangle(image, (x1, y1), (x2, y2),
                                          (255, 255, 255), 2)

                        # Draw the predicted text
                        cv2.putText(
                            image, predicted_string[-10:], (x1 + 10,
                                                            y1 + text_height + 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0,
                                                          0, 255), 2, cv2.LINE_AA
                        )
                        # # Display the image with hand landmarks and predicted sequence
                        # image_with_predictions = cv2.putText(
                        #     image.copy(), predicted_string[-10:], (50, 50),
                        #     cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA
                        # )

                        cv2.imshow('Sign Language Recognition', image)
                else:
                    # If no hand landmarks are detected, display the original image
                    cv2.imshow('Sign Language Recognition', image)

    else:
        # If no hand landmarks are detected, display the original image
        cv2.imshow('Sign Language Recognition', image)

    # Break the loop when 'Esc' key is pressed
    if cv2.waitKey(1) == 27:
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [18]:
cap.release()
cv2.destroyAllWindows()