In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
grassknoted_asl_alphabet_path = kagglehub.dataset_download('grassknoted/asl-alphabet')

print('Data source import complete.')


In [None]:
import numpy as np
import os
import cv2
from sklearn.utils import shuffle
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import gc
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16, VGG19, ResNet50, MobileNet
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
import kagglehub
import seaborn as sns
import pandas as pd
import skimage
from skimage.transform import resize
from sklearn.metrics import classification_report, confusion_matrix
import os

In [None]:
# Download latest version
path = kagglehub.dataset_download("grassknoted/asl-alphabet")

print("Path to dataset files:", path)

In [None]:
print(os.listdir('/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'))

In [None]:
# Define constants for data preprocessing and model training
imageSize = 224  # Desired size (width and height) for resizing images
target_dims = (imageSize, imageSize, 3)  # Target dimensions for input images
num_classes = 27  # Number of classes in the dataset after exclusion

# Training data directory path
train_dir = '/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'  # Path to the dataset folder

# Function to load and preprocess a reduced dataset
def get_reduced_data(folder, target_samples=10000, excluded_classes=['del', 'nothing']):
    """
    Load a balanced subset of images and labels from the specified folder.

    Args:
        folder (str): Path to the dataset folder.
        target_samples (int): Total number of samples to include in the reduced dataset.
        excluded_classes (list): Classes to exclude from the dataset.

    Returns:
        tuple: Preprocessed images (X) and their corresponding labels (y).
    """
    import numpy as np
    import os
    import cv2
    from sklearn.utils import shuffle

    # Calculate the number of samples per class
    label_mapping = {
        'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9,
        'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19,
        'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25, 'space': 26
    }
    samples_per_class = target_samples // len(label_mapping)

    # Initialize lists to store images and labels
    X, y = [], []

    for folderName in os.listdir(folder):
        if folderName not in excluded_classes and folderName in label_mapping:
            label = label_mapping[folderName]
            folder_path = os.path.join(folder, folderName)
            image_files = os.listdir(folder_path)

            # Shuffle and select a subset of images for the class
            image_files = shuffle(image_files, random_state=42)[:samples_per_class]

            for image_filename in image_files:
                img_path = os.path.join(folder_path, image_filename)
                img_file = cv2.imread(img_path)
                if img_file is not None:
                    img_file = cv2.resize(img_file, (imageSize, imageSize))
                    X.append(img_file.astype(np.float32) / 255.0)  # Normalize the images
                    y.append(label)

    # Convert lists to NumPy arrays
    X = np.array(X)
    y = np.array(y)

    # Shuffle the dataset to ensure randomness
    X, y = shuffle(X, y, random_state=42)

    return X, y

# Load the reduced training data
X_train, y_train = get_reduced_data(train_dir)

# Print a success message
print(f"Images successfully imported. Total images processed: {len(y_train)}")

In [None]:
def check_data_balance(labels, label_mapping):
    """
    Print the number of samples for each class.

    Args:
        labels (array-like): Array of labels.
        label_mapping (dict): Dictionary mapping class names to indices.
    """
    # Count the occurrences of each label
    label_counts = Counter(labels)

    # Reverse the label mapping to get class names
    reverse_mapping = {v: k for k, v in label_mapping.items()}

    print("Data balance across classes:")
    for label, count in label_counts.items():
        class_name = reverse_mapping.get(label, "Unknown")
        print(f"Class '{class_name}' (label {label}): {count} samples")


In [None]:
# Define the label mapping
label_mapping = {
    'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9,
    'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19,
    'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25, 'space': 26
}

# Check data balance
check_data_balance(y_train, label_mapping)


In [None]:
def plot_data_balance(labels, label_mapping):
    """
    Plot the number of samples for each class.

    Args:
        labels (array-like): Array of labels.
        label_mapping (dict): Dictionary mapping class names to indices.
    """
    label_counts = Counter(labels)
    reverse_mapping = {v: k for k, v in label_mapping.items()}

    # Prepare data for the plot
    class_names = [reverse_mapping[i] for i in sorted(label_counts.keys())]
    counts = [label_counts[i] for i in sorted(label_counts.keys())]

    # Plot
    plt.figure(figsize=(12, 6))
    plt.bar(class_names, counts, color='skyblue')
    plt.xlabel('Class Name')
    plt.ylabel('Number of Samples')
    plt.title('Class Distribution')
    plt.xticks(rotation=45)
    plt.show()

In [None]:
# Plot data balance
plot_data_balance(y_train, label_mapping)

In [None]:
print("The shape of X_train is : ", X_train.shape)
print("The shape of y_train is : ", y_train.shape)

In [None]:
print("The shape of one image is : ", X_train[0].shape)

In [None]:
plt.imshow(X_train[80])
plt.show()

In [None]:
X_data = X_train.copy()
y_data = y_train.copy()

In [None]:
# Split into training (70%) and testing (30%)
X_train, X_temp, y_train, y_temp = train_test_split(X_data, y_data, test_size=0.3, random_state=42)

# Further split the testing data into validation (15%) and testing (15%)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Testing set: {X_test.shape}")

In [None]:
# Dynamically determine the number of unique classes
num_classes = len(np.unique(y_train))

# One-hot encode the labels for training, validation, and testing
y_cat_train = to_categorical(y_train, num_classes)
y_cat_val = to_categorical(y_val, num_classes)
y_cat_test = to_categorical(y_test, num_classes)

# Print shapes to confirm
print(f"Training labels shape: {y_cat_train.shape}")
print(f"Validation labels shape: {y_cat_val.shape}")
print(f"Testing labels shape: {y_cat_test.shape}")

In [None]:
# Checking the dimensions of all the variables
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)
print(y_cat_train.shape)
print(y_cat_test.shape)
print(y_cat_val.shape)

In [None]:
# Delete unused variables to free up memory space
del X_data  # Remove the copy of training data (no longer needed)
del y_data  # Remove the copy of training labels (no longer needed)

# Force garbage collection to immediately reclaim the memory occupied by deleted variables
gc.collect()  # Ensures unused memory is cleared to optimize resource usage

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=4, verbose=1)

In [None]:
# Define the CNN model
model = Sequential([
    # First convolutional layer with ReLU activation
    Conv2D(32, (3, 3), input_shape=(224, 224, 3)),  # 32 filters, kernel size 3x3, input size 64x64x3 (RGB image)
    BatchNormalization(), # Normalize inputs for faster convergence
    Activation('relu'),  # Apply ReLU activation

    # First pooling layer
    MaxPooling2D(pool_size=(2, 2)),  # Downsample feature maps by 2x2

    # Second convolutional layer with ReLU activation
    Conv2D(64, (3, 3)),  # 64 filters, kernel size 3x3
    Activation('relu'),

    # Second pooling layer
    MaxPooling2D(pool_size=(2, 2)),

    # Third convolutional layer with ReLU activation
    Conv2D(64, (3, 3)),  # 64 filters, kernel size 3x3
    Activation('relu'),

    # Third pooling layer
    MaxPooling2D(pool_size=(2, 2)),

    # Flatten layer to convert 3D feature maps to 1D
    Flatten(),

    # Fully connected dense layer with 128 neurons
    Dense(128),
    Activation('relu'), # ReLU activation for the dense layer

    Dropout(0.5),  # Dropout layer to prevent overfitting

    # Output layer with 29 neurons (number of classes) and softmax activation
    Dense(27),
    Activation('softmax')  # Softmax activation for multi-class classification
])
model.summary()

In [None]:
model.compile(
    optimizer= Adam(learning_rate= 0.0001),  # Use Adam optimizer for efficient training
    loss= 'categorical_crossentropy',  # Appropriate loss for multi-class classification
    metrics= ['accuracy']  # Track accuracy during training
)

In [None]:
history = model.fit(
    X_train, y_cat_train,
    epochs= 30,
    batch_size= 16,
    validation_data=(X_val, y_cat_val),
    #callbacks= [early_stop],
    verbose=1
)

In [None]:
# Convert model training history to a pandas DataFrame
metrics = pd.DataFrame(model.history.history)

# Print the model metrics
print("The model metrics for CNN are")
metrics

In [None]:
# Plotting training and validation accuracy
plt.plot(metrics['accuracy'], label='Train Accuracy')
plt.plot(metrics['val_accuracy'], label='Validation Accuracy')
plt.title('Model_CNN Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plotting training and validation loss
plt.plot(metrics['loss'], label='Train Loss')
plt.plot(metrics['val_loss'], label='Validation Loss')
plt.title('Model_CNN Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss, accuracy = model.evaluate(X_test, y_cat_test, verbose=0)

# Output the test loss and accuracy
print(f"Test Loss for CNN: {loss}")
print(f"Test Accuracy for CNN: {accuracy}")

In [None]:
# Load the pre-trained VGG16 model without the top classification layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model

# Add custom layers on top
x = Flatten()(base_model.output)  # Flatten the feature maps
x = Dense(256, activation='relu')(x)  # Fully connected layer
x = Dropout(0.5)(x)  # Dropout for regularization
output = Dense(27, activation='softmax')(x)  # Output layer for 27 classes

# Create the full model
model_vgg16 = Model(inputs=base_model.input, outputs=output)

# Show the model summary
model_vgg16.summary()

In [None]:
model_vgg16.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history_vgg16 = model_vgg16.fit(
    X_train, y_cat_train,
    epochs= 30,
    batch_size= 16,
    validation_data=(X_val, y_cat_val),
    #callbacks=[early_stop],
    verbose=1
)

In [None]:
metrics_vgg16 = pd.DataFrame(model_vgg16.history.history)
print("The model_vgg16 metrics are")
metrics_vgg16

In [None]:
# Plotting training and validation accuracy
plt.plot(metrics_vgg16['accuracy'], label='Train Accuracy')
plt.plot(metrics_vgg16['val_accuracy'], label='Validation Accuracy')
plt.title('Model_vgg16 Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plotting training and validation loss
plt.plot(metrics_vgg16['loss'], label='Train Loss')
plt.plot(metrics_vgg16['val_loss'], label='Validation Loss')
plt.title('Model_vgg16 Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss1, accuracy1= model_vgg16.evaluate(X_test, y_cat_test, verbose=0)
print(f"Test Loss for vgg16: {loss1}")
print(f"Test Accuracy for vgg16: {accuracy1}")

In [None]:
# Load the pre-trained VGG19 model without the top classification layers
base_model1 = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model1.trainable = False

# Add custom layers
x = Flatten()(base_model1.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(27, activation='softmax')(x)

# Create the model
model_vgg19 = Model(inputs=base_model1.input, outputs=output)

# Show the model summary
model_vgg19.summary()

In [None]:
model_vgg19.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history_vgg19 = model_vgg19.fit(
    X_train, y_cat_train,
    epochs= 30,
    batch_size= 16,
    validation_data=(X_val, y_cat_val),
    #callbacks=[early_stop],
    verbose=1
)

In [None]:
metrics_vgg19 = pd.DataFrame(model_vgg19.history.history)
print("The model_vgg19 metrics are")
metrics_vgg19

In [None]:
# Plotting training and validation accuracy
plt.plot(metrics_vgg19['accuracy'], label='Train Accuracy')
plt.plot(metrics_vgg19['val_accuracy'], label='Validation Accuracy')
plt.title('Model_vgg19 Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plotting training and validation loss
plt.plot(metrics_vgg19['loss'], label='Train Loss')
plt.plot(metrics_vgg19['val_loss'], label='Validation Loss')
plt.title('Model_vgg19 Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss2, accuracy2= model_vgg19.evaluate(X_test, y_cat_test, verbose=0)
print(f"Test Loss for vgg19: {loss2}")
print(f"Test Accuracy for vgg19: {accuracy2}")

In [None]:
# Load the pre-trained ResNet50 model without the top layers
base_model2 = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model2.trainable = False  # Freeze the base model layers

# Add custom layers
x = GlobalAveragePooling2D()(base_model2.output)  # Replace Flatten with GAP
x = Dense(256, activation='relu')(x)             # Fully connected layer
x = Dropout(0.5)(x)                              # Dropout for regularization
output = Dense(27, activation='softmax')(x)      # Output layer for 27 classes

# Create the full model
model_resnet50 = Model(inputs=base_model2.input, outputs=output)

# Show the model summary
model_resnet50.summary()

In [None]:
model_resnet50.compile(
    optimizer= Adam(learning_rate= 0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history_resnet50 = model_resnet50.fit(
    X_train, y_cat_train,
    epochs= 30,
    batch_size= 16,
    validation_data=(X_val, y_cat_val),
    callbacks=[early_stop],
    verbose=1
)

In [None]:
metrics_resnet50 = pd.DataFrame(model_resnet50.history.history)
print("The model_ResNet50 metrics are")
metrics_resnet50

In [None]:
# Plotting training and validation accuracy
plt.plot(metrics_resnet50['accuracy'], label='Train Accuracy')
plt.plot(metrics_resnet50['val_accuracy'], label='Validation Accuracy')
plt.title('Model_ResNet50 Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plotting training and validation loss
plt.plot(metrics_resnet50['loss'], label='Train Loss')
plt.plot(metrics_resnet50['val_loss'], label='Validation Loss')
plt.title('Model_ResNet50 Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss3, accuracy3= model_resnet50.evaluate(X_test, y_cat_test, verbose=0)
print(f"Test Loss for ResNet50: {loss3}")
print(f"Test Accuracy for ResNet50: {accuracy3}")

In [None]:
# Load the pre-trained MobileNet model without the top layers
base_model3 = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model3.trainable = False  # Freeze the base model layers

# Add custom layers
x = GlobalAveragePooling2D()(base_model3.output)  # Replace Flatten with GAP
x = Dense(128, activation='relu')(x)             # Fully connected layer with fewer neurons
x = Dropout(0.3)(x)                              # Dropout for regularization
output = Dense(27, activation='softmax')(x)      # Output layer for 27 classes

# Create the full model
model_mobilenet = Model(inputs=base_model3.input, outputs=output)

# Show the model summary
model_mobilenet.summary()

In [None]:
model_mobilenet.compile(
    optimizer= Adam(learning_rate= 0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history_mobilenet = model_mobilenet.fit(
    X_train, y_cat_train,
    epochs= 30,
    batch_size= 16,
    validation_data=(X_val, y_cat_val),
    #callbacks=[early_stop],
    verbose=1
)

In [None]:
metrics_mobilenet = pd.DataFrame(model_mobilenet.history.history)
print("The model_MobileNet metrics are")
metrics_mobilenet

In [None]:
# Plotting training and validation accuracy
plt.plot(metrics_mobilenet['accuracy'], label='Train Accuracy')
plt.plot(metrics_mobilenet['val_accuracy'], label='Validation Accuracy')
plt.title('Model_MobileNet Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plotting training and validation loss
plt.plot(metrics_mobilenet['loss'], label='Train Loss')
plt.plot(metrics_mobilenet['val_loss'], label='Validation Loss')
plt.title('Model_MobileNet Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
loss4, accuracy4= model_mobilenet.evaluate(X_test, y_cat_test, verbose=0)
print(f"Test Loss for MobileNet: {loss4}")
print(f"Test Accuracy for MobileNet: {accuracy4}")

In [None]:
predictions = np.argmax(model.predict(X_test), axis=-1)

In [None]:
predictions1 = np.argmax(model_vgg16.predict(X_test), axis=-1)

In [None]:
predictions2 = np.argmax(model_vgg19.predict(X_test), axis=-1)

In [None]:
predictions3 = np.argmax(model_resnet50.predict(X_test), axis=-1)

In [None]:
predictions4 = np.argmax(model_mobilenet.predict(X_test), axis=-1)

In [None]:
# Visualize a few sample predictions
for i in range(5):  # Display first 5 predictions
    plt.imshow(X_test[i])
    plt.title(f"Predicted: {predictions[i]}, Actual: {y_test[i]}")
    plt.show()

In [None]:
# Generate classification report
print("Classification Report for CNN:")
print(classification_report(y_test, predictions))

# Generate confusion matrix
print("Confusion Matrix for CNN:")
print(confusion_matrix(y_test, predictions))

In [None]:
# Generate confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create the heatmap plot
plt.figure(figsize=(12, 12))  # Set figure size
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=range(29), yticklabels=range(29))

# Add labels and title
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for CNN')

# Display the plot
plt.show()

In [None]:
# Generate classification report
print("Classification Report for VGG16:")
print(classification_report(y_test, predictions1))

# Generate confusion matrix
print("Confusion Matrix for VGG16:")
print(confusion_matrix(y_test, predictions1))

In [None]:
# Generate confusion matrix
cm1 = confusion_matrix(y_test, predictions1)

# Create the heatmap plot
plt.figure(figsize=(12, 12))  # Set figure size
sns.heatmap(cm1, annot=True, fmt="d", cmap="Blues", xticklabels=range(29), yticklabels=range(29))

# Add labels and title
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for VGG16')

# Display the plot
plt.show()

In [None]:
# Generate classification report
print("Classification Report for VGG19:")
print(classification_report(y_test, predictions2))

# Generate confusion matrix
print("Confusion Matrix for VGG19:")
print(confusion_matrix(y_test, predictions2))

In [None]:
# Generate confusion matrix
cm2 = confusion_matrix(y_test, predictions2)

# Create the heatmap plot
plt.figure(figsize=(12, 12))  # Set figure size
sns.heatmap(cm2, annot=True, fmt="d", cmap="Blues", xticklabels=range(29), yticklabels=range(29))

# Add labels and title
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for VGG19')

# Display the plot
plt.show()

In [None]:
# Generate classification report
print("Classification Report for ResNet50:")
print(classification_report(y_test, predictions3))

# Generate confusion matrix
print("Confusion Matrix for ResNet50:")
print(confusion_matrix(y_test, predictions3))

In [None]:
# Generate confusion matrix
cm3 = confusion_matrix(y_test, predictions3)

# Create the heatmap plot
plt.figure(figsize=(12, 12))  # Set figure size
sns.heatmap(cm3, annot=True, fmt="d", cmap="Blues", xticklabels=range(29), yticklabels=range(29))

# Add labels and title
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for ResNet50')

# Display the plot
plt.show()

In [None]:
# Generate classification report
print("Classification Report for MobileNet:")
print(classification_report(y_test, predictions4))

# Generate confusion matrix
print("Confusion Matrix for MobileNet:")
print(confusion_matrix(y_test, predictions4))

In [None]:
# Generate confusion matrix
cm4 = confusion_matrix(y_test, predictions4)

# Create the heatmap plot
plt.figure(figsize=(12, 12))  # Set figure size
sns.heatmap(cm4, annot=True, fmt="d", cmap="Blues", xticklabels=range(29), yticklabels=range(29))

# Add labels and title
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix Heatmap for MobileNet')

# Display the plot
plt.show()

In [None]:
model.save('Model_CNN.h5')
model_vgg16.save('Model_VGG16.h5')
model_vgg19.save('Model_VGG19.h5')
model_mobilenet.save('Model_MobileNet.h5')