*mounting to google drive*

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


*install library prerequisites*

In [2]:
!pip install numpy scikit-learn pillow tqdm



*imports*

In [3]:
import os
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, TimeDistributed, Flatten, Dropout, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tqdm import tqdm
from tensorflow.keras.layers import Reshape

*script for finding the folders in drive*

In [4]:
# Print the contents of the ASL_to_Text_Project directory
project_dir = '/content/drive/MyDrive/ASL_to_Text_Project'
print(f"\nContents of {project_dir}:")
print(os.listdir(project_dir))

# Print the contents of the data directory
data_dir = os.path.join(project_dir, 'data')
print(f"\nContents of {data_dir}:")
print(os.listdir(data_dir))


Contents of /content/drive/MyDrive/ASL_to_Text_Project:
['data', 'models']

Contents of /content/drive/MyDrive/ASL_to_Text_Project/data:
['labels', 'images', 'gesture_sequences']


*initialize/configure*

In [5]:
#  Configuration
IMG_SIZE = 224  # Images will be resized to this size
SEQUENCE_LENGTH = 30
BATCH_SIZE = 16
EPOCHS = 50
DATA_DIR = r"/content/drive/My Drive/ASL_to_Text_Project/data"
IMAGES_DIR = os.path.join(DATA_DIR, 'images')
SEQUENCES_DIR = os.path.join(DATA_DIR, 'gesture_sequences')
MODEL_DIR = r"/content/drive/My Drive/ASL_to_Text_Project/models"
LABELS_DIR = r"/content/drive/My Drive/ASL_to_Text_Project/data/"

*directory tweaks*

In [6]:
# Create directories if they don't exist
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(LABELS_DIR, exist_ok=True)
os.makedirs(SEQUENCES_DIR, exist_ok=True)

*limit gpu memory growth*

In [7]:
# # Limit GPU memory growth
# gpus = tf.config.list_physical_devices('GPU')
# if gpus:
#     try:
#         for gpu in gpus:
#             tf.config.experimental.set_memory_growth(gpu, True)
#     except RuntimeError as e:
#         print(e)

# # Force TensorFlow to use the GPU
# tf.config.set_visible_devices(gpus[0], 'GPU')
# tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

# # Verify TensorFlow is using GPU
# print("TensorFlow is using GPU:", tf.test.is_built_with_cuda())
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# print("Devices:", tf.config.list_physical_devices())

# Data Loading & Preprocessing

In [8]:
# --- Data Loading & Preprocessing ---

#Collect labels for CNN data (from IMAGES_DIR)
cnn_labels = os.listdir(IMAGES_DIR)
#Collect labels for gesture sequences (from SEQUENCES_DIR)
sequence_labels = os.listdir(SEQUENCES_DIR)
#Fit separate LabelEncoders
le_cnn = LabelEncoder()
le_cnn.fit(cnn_labels)
num_classes_cnn = len(le_cnn.classes_)

le_sequence = LabelEncoder()
le_sequence.fit(sequence_labels)
num_classes_sequence = len(le_sequence.classes_)

# load and preprocess iamge

In [9]:
def load_and_preprocess_image(image_path):
    img = load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = img_to_array(img) / 255.0
    return img_array

# load cnn image function

In [10]:
def load_cnn_data(data_dir, batch_size, gestures=None):
    image_names = gestures if gestures is not None else os.listdir(data_dir)
    while True:
        np.random.shuffle(image_names)
        for image_name in image_names:
            image_dir = os.path.join(data_dir, image_name)
            image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
            np.random.shuffle(image_files)
            for i in range(0, len(image_files), batch_size):
                batch_files = image_files[i:i+batch_size]
                batch_images = []
                batch_labels = []
                for file_name in batch_files:
                    image_path = os.path.join(image_dir, file_name)
                    img_array = load_and_preprocess_image(image_path)
                    batch_images.append(img_array)
                    batch_labels.append(image_name)
                yield np.array(batch_images), to_categorical(le_cnn.transform(batch_labels), num_classes=num_classes_cnn)

# Load gesture sequences function

In [11]:
def load_gesture_data(data_dir, sequence_length, batch_size, gestures=None):
    gesture_names = gestures if gestures is not None else os.listdir(data_dir)
    batch_images = []
    batch_labels = []

    while True:
        np.random.shuffle(gesture_names)
        for gesture_name in gesture_names:
            gesture_dir = os.path.join(data_dir, gesture_name)
            sequence_folders = os.listdir(gesture_dir)
            np.random.shuffle(sequence_folders)

            for sequence_folder in sequence_folders:
                sequence_path = os.path.join(gesture_dir, sequence_folder)
                image_files = [
                    f for f in os.listdir(sequence_path) if f.endswith('.jpg')
                ]
                image_files.sort()

                if len(image_files) < sequence_length:
                    continue

                start_idx = np.random.randint(
                    0, len(image_files) - sequence_length + 1
                )
                frames = []
                for i in range(start_idx, start_idx + sequence_length):
                    frame_path = os.path.join(sequence_path, image_files[i])
                    img_array = load_and_preprocess_image(frame_path)
                    frames.append(img_array)

                # Encode the gesture_name before one-hot encoding
                numerical_label = le_sequence.transform([gesture_name])[0]
                batch_images.append(np.array(frames))
                batch_labels.append(numerical_label)

                # Yield a batch when enough samples are accumulated
                if len(batch_images) == batch_size:
                    yield np.array(batch_images), to_categorical(
                        batch_labels, num_classes=num_classes_sequence
                    )
                    batch_images = []  # Reset for the next batch
                    batch_labels = []

# Training with RNN + CNN training model
*hybrid of rnn and cnn: Convolutional Neural Network - Long Short-Term Memory (CNN-LSTM)*

In [12]:
from tensorflow.keras.layers import Reshape

def create_cnn_lstm_model(num_classes_cnn, num_classes_sequence):
    # --- CNN Branch ---
    cnn_input = Input(shape=(IMG_SIZE, IMG_SIZE, 3), name="cnn_input")
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(cnn_input)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    cnn_output = Flatten()(x)
    print("CNN Output Shape:", cnn_output.shape)  # Print shape for debugging

    # --- RNN Branch ---
    rnn_input = Input(shape=(SEQUENCE_LENGTH, IMG_SIZE, IMG_SIZE, 3), name="rnn_input")
    y = TimeDistributed(Conv2D(64, kernel_size=(3, 3), activation='relu'))(rnn_input)
    y = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(y)
    y = TimeDistributed(Conv2D(128, kernel_size=(3, 3), activation='relu'))(y)
    y = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(y)
    y = TimeDistributed(Conv2D(128, kernel_size=(3, 3), activation='relu'))(y)
    y = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(y)
    y = TimeDistributed(Flatten())(y)
    print("Shape after TimeDistributed Flatten:", y.shape)

    y = LSTM(256, return_sequences=False)(y)  # return_sequences=False for feature extraction
    print("Shape after LSTM:", y.shape)

    rnn_output = y
    print("Shape after LSTM (final):", rnn_output.shape)

    # --- Reshape RNN Output ---
    rnn_output = Reshape((BATCH_SIZE, 1, -1))(rnn_output)
    print("Shape after Reshape:", rnn_output.shape)

    # --- Flatten the RNN output ---
    rnn_output = Flatten()(rnn_output)
    print("Shape after Flatten:", rnn_output.shape)

    # --- Concatenate CNN and Flattened RNN Outputs ---
    merged_output = concatenate([cnn_output, rnn_output])
    print("Shape after Concatenate:", merged_output.shape)

    # --- Dense Layers for Classification ---
    cnn_final_output = Dense(num_classes_cnn, activation='softmax', name='cnn_output')(merged_output)
    sequence_final_output = Dense(num_classes_sequence, activation='softmax', name='sequence_output')(merged_output)

    # --- Model Definition ---
    model = Model(inputs=[cnn_input, rnn_input], outputs=[cnn_final_output, sequence_final_output])
    return model

# Data Splitting

In [13]:
# Data Splitting
cnn_gestures_train = cnn_labels[:int(len(cnn_labels) * 0.8)]
cnn_gestures_val = cnn_labels[int(len(cnn_labels) * 0.8):]
sequence_gestures_train = sequence_labels[:int(len(sequence_labels) * 0.8)]
sequence_gestures_val = sequence_labels[int(len(sequence_labels) * 0.8):]

*Mixed Precision*

In [14]:
# Mixed Precision
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Calculate Steps per Epoch

In [15]:
# Calculate Steps per Epoch
total_training_samples = sum(len(os.listdir(os.path.join(SEQUENCES_DIR, gesture)))
                            for gesture in sequence_gestures_train)
total_validation_samples = sum(len(os.listdir(os.path.join(SEQUENCES_DIR, gesture)))
                             for gesture in sequence_gestures_val)

steps_per_epoch = total_training_samples // BATCH_SIZE
validation_steps = total_validation_samples // BATCH_SIZE

# Create dataset objects

In [16]:
# --- Create tf.data.Dataset objects ---
train_cnn_data = tf.data.Dataset.from_generator(
    lambda: load_cnn_data(IMAGES_DIR, BATCH_SIZE, gestures=cnn_gestures_train),
    output_signature=(
        tf.TensorSpec(shape=(None, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, num_classes_cnn), dtype=tf.float32)
    )
)

train_sequence_data = tf.data.Dataset.from_generator(
    lambda: load_gesture_data(SEQUENCES_DIR, SEQUENCE_LENGTH, BATCH_SIZE, gestures=sequence_gestures_train),
    output_signature=(
        tf.TensorSpec(shape=(None, SEQUENCE_LENGTH, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, num_classes_sequence), dtype=tf.float32)
    )
)

train_dataset = tf.data.Dataset.zip((train_cnn_data, train_sequence_data))
train_dataset = train_dataset.map(lambda cnn, seq: ({'cnn_input': cnn[0], 'rnn_input': seq[0]},
                                                    {'cnn_output': cnn[1], 'sequence_output': seq[1]}))



# validation dataset

In [17]:
# --- Validation Dataset (Follow the same pattern as training data) ---
test_cnn_data = tf.data.Dataset.from_generator(
    lambda: load_cnn_data(IMAGES_DIR, BATCH_SIZE, gestures=cnn_gestures_test),
    output_signature=(
        tf.TensorSpec(shape=(None, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, num_classes_cnn), dtype=tf.float32)
    )
)

test_sequence_data = tf.data.Dataset.from_generator(
    lambda: load_gesture_data(SEQUENCES_DIR, SEQUENCE_LENGTH, BATCH_SIZE, gestures=sequence_gestures_test),
    output_signature=(
        tf.TensorSpec(shape=(None, SEQUENCE_LENGTH, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, num_classes_sequence), dtype=tf.float32)
    )
)

test_dataset = tf.data.Dataset.zip((test_cnn_data, test_sequence_data))
test_dataset = test_dataset.map(lambda cnn, seq: ({'cnn_input': cnn[0], 'rnn_input': seq[0]},
                                                    {'cnn_output': cnn[1], 'sequence_output': seq[1]}))

# Model compilation and training

In [None]:
# --- Model Compilation & Training ---
model = create_cnn_lstm_model(num_classes_cnn, num_classes_sequence)
model.compile(
    optimizer='adam',
    loss={
        'cnn_output': 'categorical_crossentropy',
        'sequence_output': 'categorical_crossentropy'
    },
    metrics={
        'cnn_output': ['accuracy'],
        'sequence_output': ['accuracy']
    }
)

history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=test_dataset,
    validation_steps=validation_steps
)

CNN Output Shape: (None, 86528)
Shape after TimeDistributed Flatten: (None, 30, 86528)
Shape after LSTM: (None, 256)
Shape after LSTM (final): (None, 256)
Shape after Reshape: (None, 16, 1, 16)
Shape after Flatten: (None, 256)
Shape after Concatenate: (None, 86784)
Epoch 1/50


# Test Set Evaluation and saving


In [None]:
# Evaluate the model
test_loss, cnn_test_loss, sequence_test_loss, cnn_test_acc, sequence_test_acc = model.evaluate(val_dataset, steps=validation_steps)
print(f"CNN Test accuracy: {cnn_test_acc:.4f}")
print(f"Sequence Test accuracy: {sequence_test_acc:.4f}")

# Create a new dataset with encoded labels for evaluation

In [None]:
# Create a new dataset with encoded labels for evaluation
def encode_labels(input_batch):
    images, labels = input_batch
    encoded_labels = to_categorical(le_sequence.transform(labels), num_classes=num_classes_sequence)
    return images, encoded_labels

# Use 'map' to apply the label encoding to the test dataset
encoded_test_dataset = test_dataset.map(encode_labels)

# Evaluate the model on the dataset with encoded labels
test_loss, cnn_test_loss, sequence_test_loss, cnn_test_acc, sequence_test_acc = model.evaluate(
    encoded_test_dataset,
    steps=validation_steps
)
print(f"CNN Test accuracy: {cnn_test_acc:.4f}")
print(f"Sequence Test accuracy: {sequence_test_acc:.4f}")

# Saving the model and label encoder

In [None]:
# Save the model
model_save_path = os.path.join(MODEL_DIR, "model.h5")
model.save(model_save_path)
print(f"Model saved to: {model_save_path}")

# Save the label encoders
le_cnn_path = os.path.join(LABELS_DIR, 'le_cnn.pkl')
le_sequence_path = os.path.join(LABELS_DIR, 'le_seq.pkl')

with open(le_cnn_path, 'wb') as f:
    pickle.dump(le_cnn, f)
with open(le_sequence_path, 'wb') as f:
    pickle.dump(le_sequence, f)

print(f"Label encoders saved to: {le_cnn_path} and {le_sequence_path}")

# Calculate total images trained

In [None]:
#Calculate and print total images trained on
total_images_trained = total_training_samples * SEQUENCE_LENGTH
print(f"Total Images Trained On: {total_images_trained}")

# Plot Training and Validation curves

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['cnn_output_accuracy'], label='CNN Training Accuracy')
plt.plot(history.history['val_cnn_output_accuracy'], label='CNN Validation Accuracy')
plt.plot(history.history['sequence_output_accuracy'], label='Sequence Training Accuracy')
plt.plot(history.history['val_sequence_output_accuracy'], label='Sequence Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['cnn_output_loss'], label='CNN Training Loss')
plt.plot(history.history['val_cnn_output_loss'], label='CNN Validation Loss')
plt.plot(history.history['sequence_output_loss'], label='Sequence Training Loss')
plt.plot(history.history['val_sequence_output_loss'], label='Sequence Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()