In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, TimeDistributed, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix

# Define paths
data_dir = '/kaggle/input/machine-learning-toolkits-awesome-ml-resources'  # تأكد من أن هذا المسار صحيح

# تحقق من وجود المسار
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Directory {data_dir} does not exist. Please check the path.")

# Hyperparameters
img_width, img_height = 64, 64
batch_size = 32  # حجم الدفعة
epochs = 20
num_classes = 29  # اضبط بناءً على عدد فئات لغة الإشارة
timesteps = 10  # عدد الإطارات في كل تسلسل

# Custom Data Generator for Sequences
def sequence_generator(directory, datagen, batch_size, timesteps, img_size, subset):
    generator = datagen.flow_from_directory(
        directory,
        target_size=img_size,
        batch_size=batch_size * timesteps,  # تحميل عدد كافٍ من الصور لإنشاء التسلسلات
        class_mode='sparse',  # استخدام 'sparse' للحصول على تسميات عددية
        subset=subset
    )
    while True:
        x, y = generator.__next__()  # استخدام __next__() بدلاً من next()
        
        # حساب عدد الدفعات بناءً على عدد الصور المتاحة
        num_samples = x.shape[0]
        actual_batch_size = num_samples // timesteps
        
        # إعادة تشكيل البيانات
        x = x[:actual_batch_size * timesteps]  # تأكد من أن عدد الصور قابل للقسمة على timesteps
        x = x.reshape((actual_batch_size, timesteps, img_size[0], img_size[1], 3))  # إعادة تشكيل إلى (actual_batch_size, timesteps, height, width, channels)
        
        y = y[:actual_batch_size * timesteps]  # تأكد من أن عدد التسميات قابل للقسمة على timesteps
        y = y[::timesteps]  # تحديد التسميات للتسلسلات
        
        # تحويل التسميات إلى one-hot encoding
        y = to_categorical(y, num_classes=num_classes)
        
        yield x, y

# Data preprocessing
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Calculate the number of samples
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size * timesteps,
    class_mode='sparse',  # استخدام 'sparse' للحصول على تسميات عددية
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size * timesteps,
    class_mode='sparse',  # استخدام 'sparse' للحصول على تسميات عددية
    subset='validation'
)

# Total number of training and validation samples
total_train_samples = train_generator.samples
total_val_samples = validation_generator.samples

# Steps per epoch
steps_per_epoch = total_train_samples // (batch_size * timesteps)
validation_steps = total_val_samples // (batch_size * timesteps)

# Reinitialize the generators for training
train_generator = sequence_generator(
    data_dir,
    datagen,
    batch_size,
    timesteps,
    (img_width, img_height),
    subset='training'
)

validation_generator = sequence_generator(
    data_dir,
    datagen,
    batch_size,
    timesteps,
    (img_width, img_height),
    subset='validation'
)

# CNN-LSTM Model
model = Sequential()

# CNN Part
model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(timesteps, img_width, img_height, 3)))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Flatten()))

# LSTM Part
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.5))

# Fully Connected Layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model Summary
model.summary()

# Train the model
print("Training the model...")
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    epochs=epochs
)

# Evaluate the model on the test data
print("Evaluating the model on the test data...")
test_loss, test_accuracy = model.evaluate(validation_generator, steps=validation_steps)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

# Predict on the test data
print("Predicting on the test data...")
y_pred = model.predict(validation_generator, steps=validation_steps)
y_pred_classes = np.argmax(y_pred, axis=1)

# Get the true labels
y_true = []
for i in range(validation_steps):
    _, y = validation_generator.__next__()
    y_true.extend(np.argmax(y, axis=1))

# Classification Report
print("Classification Report:")
print(classification_report(y_true, y_pred_classes))

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred_classes))

# Save the model
model.save('sign_language_cnn_lstm_model.h5')
print("Model saved successfully.")

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


Training the model...
Found 0 images belonging to 0 classes.
Epoch 1/20
  16077/Unknown [1m503s[0m 31ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00