In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

img_size = (224, 224)  
data_dir = r"D:\arabic_datases_cnn\mediapipe - Copy (2)"

# datagen = ImageDataGenerator(
#     preprocessing_function=preprocess_input,
#     validation_split=0.2,
#     zoom_range=0.05,
#     rotation_range=10,
#     width_shift_range=0.05,
#     height_shift_range=0.05,
#     shear_range=0.05
# )
val_split = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.2,
)

train_data = val_split.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=16,
    class_mode='categorical',
    subset='training',
    shuffle =True
)

val_data = val_split.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=16,
    class_mode='categorical',
    subset='validation',
    shuffle =False
)

Found 12461 images belonging to 31 classes.
Found 3106 images belonging to 31 classes.


In [4]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# for layer in base_model.layers:
#     layer.trainable = False  

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)

predictions = Dense(train_data.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [5]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(train_data, validation_data=val_data, epochs=20, callbacks=[early_stop, reduce_lr, checkpoint])

Epoch 1/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.3143 - loss: 2.4486



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1556s[0m 2s/step - accuracy: 0.3146 - loss: 2.4474 - val_accuracy: 0.9121 - val_loss: 0.2908 - learning_rate: 1.0000e-04
Epoch 2/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9080 - loss: 0.3294



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1691s[0m 2s/step - accuracy: 0.9080 - loss: 0.3294 - val_accuracy: 0.9652 - val_loss: 0.1174 - learning_rate: 1.0000e-04
Epoch 3/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1811s[0m 2s/step - accuracy: 0.9603 - loss: 0.1469 - val_accuracy: 0.9598 - val_loss: 0.1525 - learning_rate: 1.0000e-04
Epoch 4/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9706 - loss: 0.1011



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1737s[0m 2s/step - accuracy: 0.9706 - loss: 0.1011 - val_accuracy: 0.9778 - val_loss: 0.0978 - learning_rate: 1.0000e-04
Epoch 5/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9763 - loss: 0.0891



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1738s[0m 2s/step - accuracy: 0.9763 - loss: 0.0891 - val_accuracy: 0.9842 - val_loss: 0.0611 - learning_rate: 1.0000e-04
Epoch 6/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1739s[0m 2s/step - accuracy: 0.9825 - loss: 0.0707 - val_accuracy: 0.9775 - val_loss: 0.0851 - learning_rate: 1.0000e-04
Epoch 7/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1728s[0m 2s/step - accuracy: 0.9867 - loss: 0.0482 - val_accuracy: 0.9730 - val_loss: 0.1073 - learning_rate: 1.0000e-04
Epoch 8/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1755s[0m 2s/step - accuracy: 0.9855 - loss: 0.0591 - val_accuracy: 0.9746 - val_loss: 0.1180 - learning_rate: 1.0000e-04
Epoch 9/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9875 - loss: 0.0467



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1772s[0m 2s/step - accuracy: 0.9875 - loss: 0.0467 - val_accuracy: 0.9845 - val_loss: 0.0719 - learning_rate: 1.0000e-04
Epoch 10/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9902 - loss: 0.0414



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1754s[0m 2s/step - accuracy: 0.9902 - loss: 0.0414 - val_accuracy: 0.9868 - val_loss: 0.0677 - learning_rate: 1.0000e-04
Epoch 11/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9970 - loss: 0.0124



[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1749s[0m 2s/step - accuracy: 0.9970 - loss: 0.0124 - val_accuracy: 0.9913 - val_loss: 0.0510 - learning_rate: 2.0000e-05
Epoch 12/20
[1m779/779[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1816s[0m 2s/step - accuracy: 0.9986 - loss: 0.0064 - val_accuracy: 0.9913 - val_loss: 0.0418 - learning_rate: 2.0000e-05
Epoch 13/20
[1m  3/779[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33:48[0m 3s/step - accuracy: 0.9826 - loss: 0.0171  

KeyboardInterrupt: 

In [6]:
from sklearn.metrics import confusion_matrix, classification_report
# import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# عدد الصور في مجموعة التحقق
val_steps = val_data.samples // val_data.batch_size + 1

# توقعات الموديل على مجموعة التحقق
predictions = model.predict(val_data, steps=val_steps)
y_pred = np.argmax(predictions, axis=1)
y_true = val_data.classes

# أسماء الأصناف
class_names = list(val_data.class_indices.keys())

# حساب مصفوفة الالتباس
cm = confusion_matrix(y_true, y_pred)

# طباعة التقرير الكامل
print("Classification Report:\n")
print(classification_report(y_true, y_pred, target_names=class_names))

# رسم مصفوفة الالتباس
# plt.figure(figsize=(16, 12))
# sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
# plt.xlabel("Predicted")
# plt.ylabel("True")
# plt.title("Confusion Matrix")
# plt.xticks(rotation=90)
# plt.yticks(rotation=0)
# plt.tight_layout()
# plt.show()


[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 386ms/step
Classification Report:

              precision    recall  f1-score   support

         Ain       0.99      1.00      1.00       100
          Al       1.00      1.00      1.00       100
        Alef       1.00      1.00      1.00       102
         Beh       0.98      0.99      0.99       101
         Dad       1.00      1.00      1.00       100
         Dal       0.98      0.99      0.99       100
         Feh       0.98      0.99      0.99       100
       Ghain       1.00      1.00      1.00       100
         Hah       0.98      1.00      0.99       100
         Heh       0.99      0.94      0.96       100
        Jeem       1.00      0.97      0.98       100
         Kaf       0.99      1.00      1.00       100
        Khah       1.00      1.00      1.00       100
         Laa       0.99      1.00      1.00       100
         Lam       1.00      1.00      1.00       100
        Meem       0.99      1.00

In [7]:
model.save('pre_99.h5')



In [2]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
import os

model = load_model("pre_99.h5")
labels = sorted(os.listdir(r"D:\arabic_datases_cnn\mediapipe - Copy (2)"))  # تأكد أن ترتيب المجلدات هو نفس ترتيب التدريب
IMAGE_SIZE = 224

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Flip the frame horizontally
    roi = cv2.resize(frame, (IMAGE_SIZE, IMAGE_SIZE))
    roi = img_to_array(roi)
    roi = roi.astype("float32") / 255.0
    roi = np.expand_dims(roi, axis=0)

    pred = model.predict(roi)[0]
    index = np.argmax(pred)
    label = labels[index]
    confidence = pred[index]

    cv2.putText(frame, f'{label} ({confidence:.2f})', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    cv2.imshow("Real-Time Sign Language Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()



In [2]:
import os
labels = sorted(os.listdir(r"D:\arabic_dataset\ARSL-No-BackGround-V2"))  # تأكد أن ترتيب المجلدات هو نفس ترتيب التدريب
labels

['Ain',
 'Al',
 'Alef',
 'Beh',
 'Dad',
 'Dal',
 'Feh',
 'Ghain',
 'Hah',
 'Heh',
 'Jeem',
 'Kaf',
 'Khah',
 'Laa',
 'Lam',
 'Meem',
 'Noon',
 'Qaf',
 'Reh',
 'Sad',
 'Seen',
 'Sheen',
 'Tah',
 'Teh',
 'Teh_Marbuta',
 'Thal',
 'Theh',
 'Waw',
 'Yeh',
 'Zah',
 'Zain']

In [None]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
import os
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

model = load_model("final_model.h5")
labels = sorted(os.listdir(r"D:\arabic_dataset\ARSL-No-BackGround-V2")) 
# labels_dict = {
#     'Ain':'ع','Al':'ال','Alef':'أ','Beh':'ب','Dad':'ض','Dal':'د','Feh':'ف','Ghain':'غ','Hah':'ح',
#     'Heh':'ه','Jeem':'ج','Kaf':'ك','Khah':'خ','Laa':'لا','Lam':'ل','Meem':'م','Noon':'ن','Qaf':'ق',
#     'Reh':'ر','Sad':'ص','Seen':'س','Sheen':'ش','Tah':'ط','Teh':'ت','Teh_Marbuta':'ة','Thal':'ذ','Theh':'ث','Waw':'و','Yeh':'ي','Zah':'ظ','Zain':'ز'
# }
IMAGE_SIZE = 224

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Flip the frame horizontally
    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame, hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )

            x_ = []
            y_ = []
            data_aux = []

            for lm in hand_landmarks.landmark:
                x_.append(lm.x)
                y_.append(lm.y)

            for lm in hand_landmarks.landmark:
                data_aux.append(lm.x - min(x_))
                data_aux.append(lm.y - min(y_))

        x1 = int(min(x_) * W) - 20
        y1 = int(min(y_) * H) - 20
        x2 = int(max(x_) * W) + 20
        y2 = int(max(y_) * H) + 20

        roi = cv2.resize(frame, (IMAGE_SIZE, IMAGE_SIZE))
        roi = img_to_array(roi)
        roi = roi.astype("float32") / 255.0
        roi = np.expand_dims(roi, axis=0)

        pred = model.predict(roi)[0]
        index = np.argmax(pred)
        label = labels[index]
        confidence = pred[index]

        cv2.putText(frame, f'{label} ({confidence:.2f})', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.imshow("Real-Time Sign Language Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 409ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m