**preprocessing**

In [1]:
import os
import pickle
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt

In [3]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

In [5]:
DATA_DIR = "./usl_common_actions"

In [7]:
data = []
labels = []

In [9]:
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []
                
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)



In [11]:
f = open('common_actions.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

**model training**

In [13]:
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [15]:
data_dict = pickle.load(open('./common_actions.pickle', 'rb'))

In [17]:
import numpy as np

# Assuming data_dict['data'] contains sequences of varying lengths
data_list = data_dict['data']

# Find the maximum length among all sequences
max_length = max(len(item) for item in data_list)

# Pad sequences with zeros (or another value) to make them uniform
padded_data = np.array([np.pad(item, (0, max_length - len(item)), mode='constant') for item in data_list])

# Resulting uniform NumPy array
print("Merged Data Shape:", padded_data.shape)

labels = np.asarray(data_dict['labels'])

Merged Data Shape: (700, 42)


In [19]:
mask = labels != 'Help'
filtered_data = padded_data[mask]
filtered_labels = labels[mask]

In [21]:
x_train, x_test, y_train, y_test = train_test_split(filtered_data, filtered_labels, test_size=0.2, shuffle=True)

In [23]:
import numpy as np

unique, counts = np.unique(labels, return_counts=True)
label_distribution = dict(zip(unique, counts))
print(label_distribution)

{'help': 100, 'i love you': 100, 'no': 100, 'please': 100, 'thank you': 100, 'yes': 100, 'you': 100}


In [47]:
from sklearn.metrics import classification_report

model = RandomForestClassifier(n_estimators=20, max_depth=2, random_state=42)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)

print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

        help       1.00      1.00      1.00        21
  i love you       1.00      1.00      1.00        21
          no       1.00      1.00      1.00        15
      please       1.00      1.00      1.00        19
   thank you       1.00      1.00      1.00        26
         yes       1.00      1.00      1.00        20
         you       1.00      1.00      1.00        18

    accuracy                           1.00       140
   macro avg       1.00      1.00      1.00       140
weighted avg       1.00      1.00      1.00       140



In [31]:
model = RandomForestClassifier()

model.fit(x_train, y_train)
y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)
print('Accuracy: {}%'.format(score * 100))

Accuracy: 100.0%


In [27]:
f = open('usl_common_actions.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

**Detection check**

In [1]:
%%writefile test.py
# Import necessary libraries
import streamlit as st
import numpy as np
import mediapipe as mp
import pickle
import cv2
from PIL import Image, ImageFont, ImageDraw

with open('usl_common_actions.p', 'rb') as f:
    model = pickle.load(f)['model']

# Urdu translations for common phrases
labels_dict = {
    'help': 'ŸÖ€åÿ±€å ŸÖÿØÿØ ⁄©ÿ±Ÿà',
    'i love you': 'ŸÖ€å⁄∫ ÿ¢Ÿæ ÿ≥€í Ÿæ€åÿßÿ± ⁄©ÿ±ÿ™ÿß/⁄©ÿ±ÿ™€å €ÅŸà⁄∫',
    'no': 'ŸÜ€Å€å⁄∫',
    'please': 'ÿ®ÿ±ÿß€Å ŸÖ€Åÿ±ÿ®ÿßŸÜ€å',
    'thank you': 'ÿ¢Ÿæ ⁄©ÿß ÿ¥⁄©ÿ±€å€Å',
    'yes': '€Åÿß⁄∫',
    'you': 'ÿ¢Ÿæ'
}
# Set up Streamlit page configuration
st.set_page_config(page_title="Urdu Sign Language", page_icon="üëã")
st.title("Urdu Sign Language Recognition")
st.write("Press the button below to start recognizing Sign Language")

# Load Mediapipe Hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Streamlit video capture function
def process_video():
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    stframe = st.empty()  # Placeholder for video frames
    result_box = st.empty()  # Placeholder for predicted results

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            st.warning("Could not access the webcam.")
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        data_aux = []
        x_ = []
        y_ = []

        if results.multi_hand_landmarks:
            
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                for lm in hand_landmarks.landmark:
                    x_.append(lm.x)
                    y_.append(lm.y)

                for lm in hand_landmarks.landmark:
                    data_aux.append(lm.x - min(x_))
                    data_aux.append(lm.y - min(y_))

                    # Ensure only 42 features (21 landmarks * 2)
                    data_aux = data_aux[:42]

                x1 = int(min(x_) * frame.shape[1]) - 10
                y1 = int(min(y_) * frame.shape[0]) - 10

            
                # Predicting the character
                prediction = model.predict([np.asarray(data_aux)])
                predicted_character = prediction[0]
                predicted_urdu_character = labels_dict.get(predicted_character, "Unknown")

                # Update the result box with the predicted Urdu character
                result_box.text(f"Predicted Urdu Character: {predicted_urdu_character}")

                # Draw bounding box and label
                x1 = int(min(x_) * frame.shape[1]) - 10
                y1 = int(min(y_) * frame.shape[0]) - 10
                x2 = int(max(x_) * frame.shape[1]) + 10
                y2 = int(max(y_) * frame.shape[0]) + 10

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)

                # Create a PIL image from the OpenCV frame
                pil_image = Image.fromarray(frame)
                draw = ImageDraw.Draw(pil_image)

                # Load your Urdu font (make sure to provide the correct path to your font file)
                font_path = './Jameel Noori Nastaleeq Kasheeda.ttf'  # Update this path if necessary
                font = ImageFont.truetype(font_path, 50)

                # Draw the predicted Urdu character
                #draw.text((x1, y1 - 50), predicted_urdu_character, font=font, fill=(0, 0, 0))

                # Convert PIL image back to OpenCV format
                frame = np.array(pil_image)

        # Display the processed video frame
        stframe.image(frame, channels="BGR")

    cap.release()

# Start the video processing function
if st.button("Start"):
    process_video()


Writing test.py


In [3]:
!streamlit run test.py

^C
