In [3]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

In [4]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54)  # vibrant green


def draw_landmarks_on_image(rgb_image, detection_result):
    hand_landmarks_list = detection_result.hand_landmarks
    handedness_list = detection_result.handedness
    annotated_image = np.copy(rgb_image)

    # Loop through the detected hands to visualize.
    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx]
        handedness = handedness_list[idx]

        # Draw the hand landmarks.
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
        ])
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            solutions.hands.HAND_CONNECTIONS,
            solutions.drawing_styles.get_default_hand_landmarks_style(),
            solutions.drawing_styles.get_default_hand_connections_style())

        # Get the top left corner of the detected hand's bounding box.
        height, width, _ = annotated_image.shape
        x_coordinates = [landmark.x for landmark in hand_landmarks]
        y_coordinates = [landmark.y for landmark in hand_landmarks]
        text_x = int(min(x_coordinates) * width)
        text_y = int(min(y_coordinates) * height) - MARGIN

        # Draw handedness (left or right hand) on the image.
        # cv2.putText(annotated_image, f"{handedness[0].category_name}",
        #             (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
        #             FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

    return annotated_image

In [5]:
def landmark_array(landmarks):
    # Initialize empty array to store landmarks
    arr = np.empty((21, 2))
    
    # Save landmarks to array
    for index, landmark in enumerate(landmarks):
        arr[index][0] = landmark.x
        arr[index][1] = landmark.y
        
    return arr

In [6]:
# Define labels
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
          'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'space', 'del']

In [7]:
# Load model
model_left = tf.keras.models.load_model('model-left.h5')
model_right = tf.keras.models.load_model('model-right.h5')

In [8]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a hand landmarker instance with the image mode:
options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.IMAGE)
landmarker = HandLandmarker.create_from_options(options)

In [27]:
def predict_image(filePath):    
    # Load the input image
    image = mp.Image.create_from_file(filePath)

    # Detect hand landmarks from the input image
    detection_result = landmarker.detect(image)

    # If no hand detected
    if(not detection_result.handedness):
        # Convert image to numpy array
        annotated_image = np.copy(image.numpy_view())
        
        # Resize the image
        annotated_image = cv2.resize(annotated_image, (480, 480))
        
        # Define the font settings
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1.0
        font_color = (255, 255, 255)  # White color
        thickness = 2

        # Add the text to the image
        cv2.putText(annotated_image, 'nothing', (10, 50), font, font_scale, font_color, thickness)

        # Display the image
        cv2.imshow('Prediction', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
        cv2.waitKey(0)  # Wait until a key is pressed
        cv2.destroyAllWindows()  # Close the window
        return
    
    # If hand detected
    # Get landmarks and handedness
    landmarks = detection_result.hand_world_landmarks[0]
    handedness = detection_result.handedness[0][0].category_name
    
    # Turn landmarks into numpy array
    arr = landmark_array(landmarks)

    # Reshape the test landmark to match the expected input shape
    arr = np.expand_dims(arr, axis=0)

    # Predict label
    if(handedness == 'Left'):
        prediction = model_left.predict(arr)
    else:
        prediction = model_right.predict(arr)

    # Get the predicted label
    predicted_class = labels[np.argmax(prediction, axis=1)[0]]

    # Draw landmarks and resize image
    annotated_image = draw_landmarks_on_image(
        image.numpy_view(), detection_result)
    annotated_image = cv2.resize(annotated_image, (480, 480))

    # Define the font settings
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1.0
    font_color = (255, 255, 255)  # White color
    thickness = 2

    # Add the text to the image
    cv2.putText(annotated_image, predicted_class, (10, 50), font, font_scale, font_color, thickness)

    # Display the image
    cv2.imshow('Prediction', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
    cv2.waitKey(0)  # Wait until a key is pressed
    cv2.destroyAllWindows()  # Close the window

In [12]:
predict_image('image.jpg')



In [29]:
import os

for file in os.listdir('asl_alphabet_test_left'):
    predict_image(os.path.join('asl_alphabet_test_left', file))

