**preprocessing**

In [3]:
import os
import pickle
import mediapipe as mp
import cv2
import matplotlib.pyplot as plt

In [5]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

In [7]:
DATA_DIR = "./Dataset"

In [9]:
data = []
labels = []

In [11]:
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []
                
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)



In [13]:
f = open('digit.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

**model training**

In [16]:
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [18]:
data_dict = pickle.load(open('./digit.pickle', 'rb'))

In [20]:
import numpy as np

# Assuming data_dict['data'] contains sequences of varying lengths
data_list = data_dict['data']

# Find the maximum length among all sequences
max_length = max(len(item) for item in data_list)

# Pad sequences with zeros (or another value) to make them uniform
padded_data = np.array([np.pad(item, (0, max_length - len(item)), mode='constant') for item in data_list])

# Resulting uniform NumPy array
print("Merged Data Shape:", padded_data.shape)

labels = np.asarray(data_dict['labels'])

Merged Data Shape: (1823, 84)


In [22]:
mask = labels != 'Zero'
filtered_data = padded_data[mask]
filtered_labels = labels[mask]

In [24]:
x_train, x_test, y_train, y_test = train_test_split(filtered_data, filtered_labels, test_size=0.2, shuffle=True)

In [26]:
import numpy as np

unique, counts = np.unique(labels, return_counts=True)
label_distribution = dict(zip(unique, counts))
print(label_distribution)

{'0': 168, '1': 163, '2': 153, '3': 200, '4': 194, '5': 207, '6': 175, '7': 175, '8': 185, '9': 203}


In [32]:
from sklearn.metrics import classification_report

model = RandomForestClassifier(n_estimators=5, max_depth=2, random_state=42)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)

print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        40
           1       1.00      0.97      0.99        34
           2       0.00      0.00      0.00        31
           3       0.54      0.95      0.69        38
           4       0.84      0.97      0.90        37
           5       1.00      1.00      1.00        43
           6       0.80      0.86      0.83        37
           7       1.00      0.86      0.92        35
           8       1.00      1.00      1.00        37
           9       1.00      0.97      0.98        33

    accuracy                           0.87       365
   macro avg       0.81      0.86      0.83       365
weighted avg       0.83      0.87      0.84       365



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
model = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=42)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_predict))

In [49]:
f = open('usl_model_digit.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

**Detection check**

In [55]:
import cv2
import mediapipe as mp
import pickle
import numpy as np
from PIL import ImageFont, ImageDraw, Image

# Load the trained model
with open('usl_model_digit.p', 'rb') as f:
    model = pickle.load(f)['model']

# Urdu digit labels
labels_dict = {
    '0': '۰', '1': '۱', '2': '۲', '3': '۳', '4': '۴',
    '5': '۵', '6': '۶', '7': '۷', '8': '۸', '9': '۹'
}

# Load Mediapipe hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# Initialize webcam
cap = cv2.VideoCapture(0)
predicted_digit = ""
last_predicted = None

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Webcam not accessible.")
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    data_aux = []
    x_ = []
    y_ = []

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            for lm in hand_landmarks.landmark:
                x_.append(lm.x)
                y_.append(lm.y)

            for lm in hand_landmarks.landmark:
                data_aux.append(lm.x - min(x_))
                data_aux.append(lm.y - min(y_))

            # Pad to 84 features if necessary
            while len(data_aux) < 84:
                data_aux.append(0.0)

            x1 = int(min(x_) * frame.shape[1]) - 10
            y1 = int(min(y_) * frame.shape[0]) - 10

            try:
                prediction = model.predict([np.asarray(data_aux)])
                predicted_char = prediction[0]
                urdu_digit = labels_dict.get(predicted_char, "Unknown")

                if urdu_digit != last_predicted:
                    predicted_digit = urdu_digit
                    last_predicted = urdu_digit

                # Display using PIL for Urdu font
                img_pil = Image.fromarray(frame)
                draw = ImageDraw.Draw(img_pil)
                font = ImageFont.truetype("Jameel Noori Nastaleeq Kasheeda.ttf", 48)
                draw.text((x1, y1 - 60), urdu_digit, font=font, fill=(255, 255, 255), stroke_width=2, stroke_fill=(0, 0, 0))
                frame = np.array(img_pil)

            except Exception as e:
                print("Prediction Error:", e)

    cv2.imshow("Urdu Digit Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
