*mounting to google drive*

In [41]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


*install library prerequisites*

In [None]:
!pip install numpy scikit-learn pillow tqdm



*imports*

In [None]:
import os
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, TimeDistributed, Flatten, Dropout, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tqdm import tqdm

*script for finding the folders in drive*

In [None]:
# Print the contents of the ASL_to_Text_Project directory
project_dir = '/content/drive/MyDrive/ASL_to_Text_Project'
print(f"\nContents of {project_dir}:")
print(os.listdir(project_dir))

# Print the contents of the data directory
data_dir = os.path.join(project_dir, 'data')
print(f"\nContents of {data_dir}:")
print(os.listdir(data_dir))

*initialize/configure*

In [None]:
#  Configuration
IMG_SIZE = 224  # Images will be resized to this size
SEQUENCE_LENGTH = 30
BATCH_SIZE = 16
EPOCHS = 50
DATA_DIR = r"/content/drive/My Drive/ASL_to_Text_Project/data"
IMAGES_DIR = os.path.join(DATA_DIR, 'images')
SEQUENCES_DIR = os.path.join(DATA_DIR, 'gesture_sequences')
MODEL_DIR = r"/content/drive/My Drive/ASL_to_Text_Project/models"
LABELS_DIR = r"/content/drive/My Drive/ASL_to_Text_Project/data/"

# Create directories if they don't exist
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(LABELS_DIR, exist_ok=True)
os.makedirs(SEQUENCES_DIR, exist_ok=True)

*directory tweaks*

In [None]:
# Create directories if they don't exist
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(LABELS_DIR, exist_ok=True)
os.makedirs(SEQUENCES_DIR, exist_ok=True)

*limit gpu memory growth*

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

# Load gesture sequences function

In [None]:
#  Data Loading cnn&rnn
def load_gesture_data(data_dir, sequence_length, batch_size, gestures=None):
    """Loads gesture data and resizes images to IMG_SIZE."""

    gesture_names = gestures if gestures is not None else os.listdir(data_dir)
    num_gestures = len(gesture_names)

    while True:
        for gesture_idx in range(num_gestures):
            gesture_dir = os.path.join(data_dir, gesture_names[gesture_idx])
            for sequence_folder in os.listdir(gesture_dir):
                sequence_path = os.path.join(gesture_dir, sequence_folder)
                image_files = [f for f in os.listdir(sequence_path) if f.endswith('.jpg')]
                image_files.sort(key=lambda x: int(x.split('_')[2].split(' ')[0].split('.')[0]))
                num_images = len(image_files)

                num_batches = (num_images - sequence_length + 1) // sequence_length

                for batch_idx in range(num_batches):
                    batch_sequences = []
                    batch_labels = []

                    for i in range(batch_idx * sequence_length, (batch_idx + 1) * sequence_length):
                        frames = []
                        for j in range(i, i + sequence_length):
                            frame_path = os.path.join(sequence_path, image_files[j])
                            # Resize image during loading
                            img = load_img(frame_path, target_size=(IMG_SIZE, IMG_SIZE))
                            img_array = img_to_array(img) / 255.0
                            frames.append(img_array)
                        batch_sequences.append(frames)
                        batch_labels.append(gesture_names[gesture_idx])

                    yield np.array(batch_sequences), to_categorical(le.transform(batch_labels), num_classes=num_classes)

# load cnn image function

In [None]:
def load_cnn_data(data_dir, batch_size, gestures=None):
    """Loads CNN data and resizes images to IMG_SIZE."""

    image_names = gestures if gestures is not None else os.listdir(data_dir)
    num_images = len(image_names)

    while True:
        for image_idx in range(num_images):
            image_dir = os.path.join(data_dir, image_names[image_idx])
            image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
            image_files.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))

            num_batches = len(image_files) // batch_size

            for batch_idx in range(num_batches):
                batch_images = []
                batch_labels = []
                for i in range(batch_idx * batch_size, (batch_idx + 1) * batch_size):
                    file_name = image_files[i]
                    image_path = os.path.join(image_dir, file_name)
                    # Resize image during loading
                    img = load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))
                    img_array = img_to_array(img) / 255.0
                    batch_images.append(img_array)
                    batch_labels.append(image_names[image_idx])

                yield np.array(batch_images), to_categorical(le.transform(batch_labels), num_classes=num_classes)

# Data Augmentation

In [None]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def augment_data(images, labels, batch_size):
    """Applies data augmentation to the input images and labels."""
    for x_batch, y_batch in datagen.flow(images, labels, batch_size=batch_size):
        yield x_batch, y_batch

*Data Handling*

In [None]:
def create_dataset(images, labels, batch_size, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(images))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

*Mixed Precision*

In [None]:
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

*load and split data for cnn and rnn*

In [None]:
# Get a list of all gesture names
all_gestures = os.listdir(SEQUENCES_DIR)
# Calculate the split index
split_index = int(len(all_gestures) * 0.8) # Assuming 80/20 split

# Split gesture names into training and validation sets
train_gestures = all_gestures[:split_index]
val_gestures = all_gestures[split_index:]

# Create separate training and validation data generators
train_generator_rnn = load_gesture_data(SEQUENCES_DIR, SEQUENCE_LENGTH, BATCH_SIZE, gestures=train_gestures)
train_generator_cnn = load_cnn_data(IMAGES_DIR, BATCH_SIZE, gestures=train_gestures)

val_generator_rnn = load_gesture_data(SEQUENCES_DIR, SEQUENCE_LENGTH, BATCH_SIZE, gestures=val_gestures)
val_generator_cnn = load_cnn_data(IMAGES_DIR, BATCH_SIZE, gestures=val_gestures)

# Training with RNN + CNN training model
*hybrid of rnn and cnn: Convolutional Neural Network - Long Short-Term Memory (CNN-LSTM)*

In [None]:
def create_cnn_lstm_model(num_classes):
    # --- CNN Branch ---
    cnn_input = Input(shape=(IMG_SIZE, IMG_SIZE, 3), name="cnn_input")
    x = Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation='relu',
               kernel_regularizer=tf.keras.regularizers.l2(0.01))(cnn_input)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
    x = Conv2D(256, kernel_size=(5, 5), activation='relu', padding='same',
               kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
    x = Conv2D(384, kernel_size=(3, 3), activation='relu', padding='same',
               kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = Conv2D(384, kernel_size=(3, 3), activation='relu', padding='same',
               kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same',
               kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
    cnn_output = Flatten()(x)

    # --- RNN Branch ---
    rnn_input = Input(shape=(SEQUENCE_LENGTH, IMG_SIZE, IMG_SIZE, 3), name="rnn_input")
    y = TimeDistributed(Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation='relu',
                               kernel_regularizer=tf.keras.regularizers.l2(0.01),
                               input_shape=(IMG_SIZE, IMG_SIZE, 3)))(rnn_input)
    y = TimeDistributed(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))(y)
    y = TimeDistributed(Conv2D(256, kernel_size=(5, 5), activation='relu', padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(0.01)))(y)
    y = TimeDistributed(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))(y)
    y = TimeDistributed(Conv2D(384, kernel_size=(3, 3), activation='relu', padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(0.01)))(y)
    y = TimeDistributed(Conv2D(384, kernel_size=(3, 3), activation='relu', padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(0.01)))(y)
    y = TimeDistributed(Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same',
                               kernel_regularizer=tf.keras.regularizers.l2(0.01)))(y)
    y = TimeDistributed(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))(y)
    y = TimeDistributed(Flatten())(y)
    y = LSTM(256, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(0.01))(y)
    y = Dropout(0.5)(y)
    rnn_output = LSTM(256, kernel_regularizer=tf.keras.regularizers.l2(0.01))(y)

    # --- Combine Outputs ---
    merged_output = concatenate([cnn_output, rnn_output])
    final_output = Dense(num_classes, activation='softmax')(merged_output)

    # --- Create the Model ---
    model = Model(inputs=[cnn_input, rnn_input], outputs=final_output)
    return model

# load and split data

In [None]:
# load and split data
all_gestures = os.listdir(SEQUENCES_DIR)
split_index = int(len(all_gestures) * 0.8)

train_gestures = all_gestures[:split_index]
val_gestures = all_gestures[split_index:]

# Encode Labels

In [None]:
# Encode Labels
# Collect all labels from your training data
all_train_labels = []
for gesture in train_gestures:
    gesture_dir = os.path.join(SEQUENCES_DIR, gesture)
    for sequence_folder in os.listdir(gesture_dir):
        all_train_labels.append(gesture)
# Fit the LabelEncoder on all training labels
le = LabelEncoder()
le.fit(all_train_labels)
num_classes = len(le.classes_)

train_generator_rnn = load_gesture_data(SEQUENCES_DIR, SEQUENCE_LENGTH, BATCH_SIZE, gestures=train_gestures)
train_generator_cnn = load_cnn_data(IMAGES_DIR, BATCH_SIZE, gestures=train_gestures)

val_generator_rnn = load_gesture_data(SEQUENCES_DIR, SEQUENCE_LENGTH, BATCH_SIZE, gestures=val_gestures)
val_generator_cnn = load_cnn_data(IMAGES_DIR, BATCH_SIZE, gestures=val_gestures)

# Model compilation and creation

In [None]:
# Create and compile model
model = create_cnn_lstm_model(num_classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Calculate Steps per Epoch

In [None]:
# Calculate Steps per Epoch
total_training_samples = sum(len(os.listdir(os.path.join(SEQUENCES_DIR, gesture)))
                            for gesture in train_gestures) # Count samples only in training gestures
total_validation_samples = sum(len(os.listdir(os.path.join(SEQUENCES_DIR, gesture)))
                             for gesture in val_gestures)  # Count samples only in validation gestures

steps_per_epoch = total_training_samples // BATCH_SIZE
validation_steps = total_validation_samples // BATCH_SIZE

*mixed precision*

In [None]:
#  Mixed Precision
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Training the Model

In [None]:
# Train the model
history = model.fit(
    x={"cnn_input": train_generator_cnn, "rnn_input": train_generator_rnn},
    y=train_generator_rnn,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS,
    validation_data=({"cnn_input": val_generator_cnn, "rnn_input": val_generator_rnn}, val_generator_rnn),
    validation_steps=validation_steps,
    callbacks=[
        tf.keras.callbacks.ModelCheckpoint(
            filepath=os.path.join(MODEL_DIR, "gesture_model_{epoch:02d}_{val_accuracy:.2f}.h5"),
            monitor="val_accuracy",
            save_best_only=True,
            mode="max",
        ),
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    ]
)

# Test Set Evaluation and saving


In [None]:
#Test Set Evaluation & Saving
def evaluate_generator(generator, steps):
    """Evaluates the model on a data generator."""
    all_preds = []
    all_true = []
    for _ in tqdm(range(steps), desc="Evaluating"):
        X_batch_rnn, y_batch = next(generator)
        X_batch_cnn, _ = next(generator)

        # Predict and convert to class labels
        y_pred = model.predict({"cnn_input": X_batch_cnn, "rnn_input": X_batch_rnn})
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true_classes = np.argmax(y_batch, axis=1)

        all_preds.extend(y_pred_classes)
        all_true.extend(y_true_classes)

    # Calculate accuracy
    accuracy = np.mean(np.equal(all_true, all_preds))
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Evaluate the model on the validation data
evaluate_generator(val_generator_rnn, validation_steps)

# Saving the model and label encoder

In [None]:
#Save Model and Label Encoder
model.save(os.path.join(MODEL_DIR, "gesture_model.h5"))
with open(os.path.join(LABELS_DIR, 'gesture_label_encoder.pkl'), 'wb') as f:
    pickle.dump(le, f)

# Calculate total images trained

In [None]:
#Calculate and print total images trained on
total_images_trained = total_training_samples * SEQUENCE_LENGTH
print(f"Total Images Trained On: {total_images_trained}")

# Plot Training and Validation curves

In [None]:
#Plot Training Curves
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()