In [1]:
import os
import pickle
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [12]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.7, max_num_hands = 2)

I0000 00:00:1749614729.351744 41475700 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M4 Pro


In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [None]:
import cv2
import numpy as np
import time
from keras.models import load_model
import arabic_reshaper
from bidi.algorithm import get_display
from PIL import Image, ImageDraw, ImageFont

# Load Model
modelLSTM = load_model('/Users/ahmedyouness/Sign Language Model/AsL_detection.h5')

# Initialize Webcam
cap = cv2.VideoCapture(0)

# Define Label Mapping (Arabic Letters)
label_map = {
    0: 'ع', 1: 'ا', 2: 'ب', 3: 'ض', 4: 'د', 5: 'ف',
    6: 'غ', 7: 'ح', 8: 'ه', 9: 'ج', 10: 'ك', 11: 'خ',
    12: 'ل', 13: 'م', 14: 'ن', 15: 'ق', 16: 'ر', 17: 'ص',
    18: 'س', 19: 'ش', 20: 'ط', 21: 'ت', 22: 'ذ', 23: 'ث',
    24: 'و', 25: 'ي', 26: 'ظ', 27: 'ز'
}

# Buffers and Variables
letter_buffer = []  # Stores detected letters
word_buffer = []  # Stores words for sentence
last_letter = None  # Last detected letter (for debounce)
current_word = ""  # Word being formed
current_sentence = ""  # Sentence being formed
last_detected_time = time.time()
detection_count = {}  # Track repeated letter detections

# Accuracy & Delay Settings
CONFIDENCE_THRESHOLD = 0.85  # Higher threshold to prevent mistakes
DETECTION_REPEAT = 3  # Letter must be detected this many times before adding
LETTER_DELAY = 0.5  # Seconds before allowing a new letter

# Load Arabic Font (Make sure you have an Arabic font file)
ARABIC_FONT_PATH = "/System/Library/Fonts/Supplemental/Arial Unicode.ttf"  # Change this to a valid Arabic font file path

def render_arabic_text(text):
    """Fixes Arabic text rendering for OpenCV using Pillow."""
    reshaped_text = arabic_reshaper.reshape(text)  # Fix Arabic letter order
    bidi_text = get_display(reshaped_text)  # Fix right-to-left direction
    return bidi_text

def draw_text_with_pil(image, text, position, font_path, font_size=32, color=(255, 255, 255)):
    """Draw Arabic text on an OpenCV image using PIL."""
    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(pil_image)
    font = ImageFont.truetype(font_path, font_size)
    draw.text(position, text, font=font, fill=color)
    return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame")
        break

    # Perform Hand Detection
    image, results = mediapipe_detection(frame, hands)

    predicted_character = ""  # Default empty prediction
    backspace_detected = False  # Default backspace flag

    if results.multi_hand_landmarks:
        draw_styled_landmarks(frame, results)  # Draw hand landmarks

        keypoints = extract_keypoints(results)
        keypoints = np.array(keypoints[:42]).reshape(1, -1)  # Use only first hand’s keypoints

        # Check for backspace (second hand detected)
        if len(results.multi_hand_landmarks) > 1:
            backspace_detected = True

        # Make prediction
        prediction = modelLSTM.predict(keypoints)
        predicted_class = np.argmax(prediction)
        confidence = prediction[0, predicted_class]  # Extract confidence

        # If confidence is high, process the character
        if confidence > CONFIDENCE_THRESHOLD:
            predicted_character = label_map[int(predicted_class)]

            # Count repeated detections of the same letter
            if predicted_character in detection_count:
                detection_count[predicted_character] += 1
            else:
                detection_count[predicted_character] = 1

            # If the letter is detected consistently, add it to the buffer
            if detection_count[predicted_character] >= DETECTION_REPEAT:
                if predicted_character != last_letter and (time.time() - last_detected_time) > LETTER_DELAY:
                    letter_buffer.append(predicted_character)
                    last_detected_time = time.time()  # Reset timer
                    last_letter = predicted_character  # Update last letter
                    detection_count.clear()  # Reset count to avoid duplicates

    # Handle Backspace Gesture (Raise Second Hand)
    if backspace_detected:
        if letter_buffer:
            print("Backspace Detected! Removing last letter.")
            letter_buffer.pop()  # Remove last letter
        elif word_buffer:
            print("Backspace Detected! Removing last word.")
            word_buffer.pop()  # Remove last word
        last_detected_time = time.time()  # Reset timing

    # Check for a pause (indicating end of word)
    current_time = time.time()
    if current_time - last_detected_time > 3:  # 1.5 seconds of no new letter
        if letter_buffer:
            current_word = "".join(letter_buffer)  # Form word
            word_buffer.append(current_word)  # Add word to sentence
            letter_buffer.clear()  # Clear letter buffer
            print(f"Word Detected: {current_word}")

    # Form full sentence
    current_sentence = " ".join(word_buffer)

    # Fix Arabic text rendering
    display_word = render_arabic_text(current_word)
    display_sentence = render_arabic_text(current_sentence)
    display_letter = render_arabic_text(predicted_character)

    # Draw a black rectangle as background for text
    cv2.rectangle(frame, (0, 0), (640, 120), (0, 0, 0), -1)  # Black background

    # Use PIL to draw Arabic text correctly
    frame = draw_text_with_pil(frame, f"Sentence: {display_sentence}", (50, 10), ARABIC_FONT_PATH, 32, (255, 255, 255))
    frame = draw_text_with_pil(frame, f"Word: {display_word}", (50, 50), ARABIC_FONT_PATH, 32, (255, 255, 255))
    frame = draw_text_with_pil(frame, f"Letter: {display_letter}", (50, 90), ARABIC_FONT_PATH, 32, (0, 255, 0))

    # Show video output
    cv2.imshow('Real-Time Sign Language Recognition', frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break  # Press 'q' to exit

# Release resources
cap.release()
cv2.destroyAllWindows()




NameError: name 'mediapipe_detection' is not defined

In [14]:
def draw_styled_landmarks(image, results):
    # Draw pose connections
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

W0000 00:00:1749614729.357886 41479212 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [16]:
DATA_DIR = r"/Users/ahmedyouness/Sign Language Model/char_data"
actions = []
for dir_ in os.listdir(DATA_DIR):
    actions.append(dir_)
actions

['Reh',
 'Qaf',
 'Jeem',
 'Theh',
 'Waw',
 'Sad',
 'Lam',
 'Ghain',
 'Ain',
 'Kaf',
 'Alef',
 'Teh',
 'Seen',
 'Feh',
 'Khah',
 'Zain',
 'Noon',
 'Beh',
 'Heh',
 'Dad',
 'Sheen',
 'Hah',
 'Dal',
 'Meem',
 'Thal',
 'Yeh',
 'Zah',
 'Tah']

In [17]:
import keras
import tensorflow as tf
print(keras.__version__)
print(tf.__version__)


3.10.0
2.19.0


In [18]:
from keras.models import load_model
modelLSTM = load_model("/Users/ahmedyouness/AndroidStudioProjects/Signtotext/Sign Language Model/AsL_detection.h5")




: 

In [1]:
import cv2
import numpy as np
import time
from keras.models import load_model
import arabic_reshaper
from bidi.algorithm import get_display
from PIL import Image, ImageDraw, ImageFont

# Load Model
modelLSTM = load_model('/Users/ahmedyouness/Sign Language Model/AsL_detection.h5')

# Initialize Webcam
cap = cv2.VideoCapture(0)

# Define Label Mapping (Arabic Letters)
label_map = {
    0: 'ع', 1: 'ا', 2: 'ب', 3: 'ض', 4: 'د', 5: 'ف',
    6: 'غ', 7: 'ح', 8: 'ه', 9: 'ج', 10: 'ك', 11: 'خ',
    12: 'ل', 13: 'م', 14: 'ن', 15: 'ق', 16: 'ر', 17: 'ص',
    18: 'س', 19: 'ش', 20: 'ط', 21: 'ت', 22: 'ذ', 23: 'ث',
    24: 'و', 25: 'ي', 26: 'ظ', 27: 'ز'
}

# Buffers and Variables
letter_buffer = []  # Stores detected letters
word_buffer = []  # Stores words for sentence
last_letter = None  # Last detected letter (for debounce)
current_word = ""  # Word being formed
current_sentence = ""  # Sentence being formed
last_detected_time = time.time()
detection_count = {}  # Track repeated letter detections

# Accuracy & Delay Settings
CONFIDENCE_THRESHOLD = 0.85  # Higher threshold to prevent mistakes
DETECTION_REPEAT = 3  # Letter must be detected this many times before adding
LETTER_DELAY = 0.5  # Seconds before allowing a new letter

# Load Arabic Font (Make sure you have an Arabic font file)
ARABIC_FONT_PATH = "/System/Library/Fonts/Supplemental/Arial Unicode.ttf"  # Change this to a valid Arabic font file path

def render_arabic_text(text):
    """Fixes Arabic text rendering for OpenCV using Pillow."""
    reshaped_text = arabic_reshaper.reshape(text)  # Fix Arabic letter order
    bidi_text = get_display(reshaped_text)  # Fix right-to-left direction
    return bidi_text

def draw_text_with_pil(image, text, position, font_path, font_size=32, color=(255, 255, 255)):
    """Draw Arabic text on an OpenCV image using PIL."""
    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(pil_image)
    font = ImageFont.truetype(font_path, font_size)
    draw.text(position, text, font=font, fill=color)
    return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame")
        break

    # Perform Hand Detection
    image, results = mediapipe_detection(frame, hands)

    predicted_character = ""  # Default empty prediction
    backspace_detected = False  # Default backspace flag

    if results.multi_hand_landmarks:
        draw_styled_landmarks(frame, results)  # Draw hand landmarks

        keypoints = extract_keypoints(results)
        keypoints = np.array(keypoints[:42]).reshape(1, -1)  # Use only first hand’s keypoints

        # Check for backspace (second hand detected)
        if len(results.multi_hand_landmarks) > 1:
            backspace_detected = True

        # Make prediction
        prediction = modelLSTM.predict(keypoints)
        predicted_class = np.argmax(prediction)
        confidence = prediction[0, predicted_class]  # Extract confidence

        # If confidence is high, process the character
        if confidence > CONFIDENCE_THRESHOLD:
            predicted_character = label_map[int(predicted_class)]

            # Count repeated detections of the same letter
            if predicted_character in detection_count:
                detection_count[predicted_character] += 1
            else:
                detection_count[predicted_character] = 1

            # If the letter is detected consistently, add it to the buffer
            if detection_count[predicted_character] >= DETECTION_REPEAT:
                if predicted_character != last_letter and (time.time() - last_detected_time) > LETTER_DELAY:
                    letter_buffer.append(predicted_character)
                    last_detected_time = time.time()  # Reset timer
                    last_letter = predicted_character  # Update last letter
                    detection_count.clear()  # Reset count to avoid duplicates

    # Handle Backspace Gesture (Raise Second Hand)
    if backspace_detected:
        if letter_buffer:
            print("Backspace Detected! Removing last letter.")
            letter_buffer.pop()  # Remove last letter
        elif word_buffer:
            print("Backspace Detected! Removing last word.")
            word_buffer.pop()  # Remove last word
        last_detected_time = time.time()  # Reset timing

    # Check for a pause (indicating end of word)
    current_time = time.time()
    if current_time - last_detected_time > 3:  # 1.5 seconds of no new letter
        if letter_buffer:
            current_word = "".join(letter_buffer)  # Form word
            word_buffer.append(current_word)  # Add word to sentence
            letter_buffer.clear()  # Clear letter buffer
            print(f"Word Detected: {current_word}")

    # Form full sentence
    current_sentence = " ".join(word_buffer)

    # Fix Arabic text rendering
    display_word = render_arabic_text(current_word)
    display_sentence = render_arabic_text(current_sentence)
    display_letter = render_arabic_text(predicted_character)

    # Draw a black rectangle as background for text
    cv2.rectangle(frame, (0, 0), (640, 120), (0, 0, 0), -1)  # Black background

    # Use PIL to draw Arabic text correctly
    frame = draw_text_with_pil(frame, f"Sentence: {display_sentence}", (50, 10), ARABIC_FONT_PATH, 32, (255, 255, 255))
    frame = draw_text_with_pil(frame, f"Word: {display_word}", (50, 50), ARABIC_FONT_PATH, 32, (255, 255, 255))
    frame = draw_text_with_pil(frame, f"Letter: {display_letter}", (50, 90), ARABIC_FONT_PATH, 32, (0, 255, 0))

    # Show video output
    cv2.imshow('Real-Time Sign Language Recognition', frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break  # Press 'q' to exit

# Release resources
cap.release()
cv2.destroyAllWindows()




NameError: name 'mediapipe_detection' is not defined

In [None]:
import tensorflow as tf

# Load the Keras .h5 model
model = tf.keras.models.load_model("/Users/ahmedyouness/Sign Language Model/AsL_detection.h5")

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model
with open("/Users/ahmedyouness/Sign Language Model/AsL_detection.tflite", "wb") as f:
    f.write(tflite_model)

print("Conversion successful!")



INFO:tensorflow:Assets written to: /var/folders/cj/d5mr3czd1zj6_718j3t120gw0000gn/T/tmpg7i9h8i2/assets


INFO:tensorflow:Assets written to: /var/folders/cj/d5mr3czd1zj6_718j3t120gw0000gn/T/tmpg7i9h8i2/assets


Saved artifact at '/var/folders/cj/d5mr3czd1zj6_718j3t120gw0000gn/T/tmpg7i9h8i2'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 42), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 28), dtype=tf.float32, name=None)
Captures:
  14233429264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233432144: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233429072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233433680: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233435984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233435216: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233435024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233430608: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233430800: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233434064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  14233424080: Tensor

W0000 00:00:1749591867.803865 40880787 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1749591867.804020 40880787 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.


Conversion successful!


I0000 00:00:1749591867.807789 40880787 mlir_graph_optimization_pass.cc:425] MLIR V1 optimization pass is not enabled


: 