In [None]:
import numpy as np
import pandas as pd

import os
import cv2

import splitfolders

from tensorflow.keras import models, layers, Input
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.regularizers import l2

from tensorflow.keras.applications.mobilenet import preprocess_input

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, recall_score, accuracy_score

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('bmh')

In [None]:
os.makedirs('datasets', exist_ok=True)

# List files in a directory as a sanity check
"""for dirname, _, filenames in os.walk('Data/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))"""

# List files in a specific directory
# This will list the labels
print(os.listdir('Data/kaggle/input/asl-alphabet/asl_alphabet_train'))

In [None]:
"""train_src = "Data/kaggle/input/asl-alphabet/asl_alphabet_train/"

splitfolders.ratio(train_src, output="datasets/asl_alphabet",
    seed=1337, ratio=(.8, .1, .1), group_prefix=None, move=False) # default values"""

In [None]:
train_dir = 'datasets/asl_alphabet/train'
val_dir = 'datasets/asl_alphabet/val'
test_dir  = 'datasets/asl_alphabet/test'

In [None]:
batch_size = 32
target_size = (32,32) # dataset pic = 200x200

train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
val_datagen   = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
test_datagen  = ImageDataGenerator(rescale=1./255, horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=target_size,
        batch_size=batch_size,
        color_mode="rgb",
        class_mode='categorical',
        shuffle=True)

val_generator = val_datagen.flow_from_directory(
        val_dir,
        target_size=target_size,
        batch_size=batch_size,
        color_mode="rgb",
        class_mode='categorical',
        shuffle=False)

test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=target_size,
        batch_size=batch_size,
        color_mode="rgb",
        class_mode='categorical',
        shuffle=False)

In [None]:
labels = list(train_generator.class_indices.keys())
print(labels)

In [None]:
TRAIN_PATH = train_dir
def sample_images(labels):
    # Create Subplots
    y_size = 12
    if(len(labels)<10):
        y_size = y_size * len(labels) / 10
    fig, axs = plt.subplots(len(labels), 9, figsize=(y_size, 13))

    for i, label in enumerate(labels):
        axs[i, 0].text(0.5, 0.5, label, ha='center', va='center', fontsize=8)
        axs[i, 0].axis('off')

        label_path = os.path.join(TRAIN_PATH, label)
        list_files = os.listdir(label_path)

        for j in range(8):
            img_label = cv2.imread(os.path.join(label_path, list_files[j]))
            img_label = cv2.cvtColor(img_label, cv2.COLOR_BGR2RGB)
            axs[i, j+1].imshow(img_label)
            axs[i, j+1].axis("off")

    # Title
    plt.suptitle("Sample Images in ASL Alphabet Dataset", x=0.55, y=0.92)

    # Show
    plt.show()

In [None]:
sample_images(labels[:10])

CNN Model

In [None]:
num_classes = len(labels)
input_shape = (32,32,3)

input_layer = layers.Input(shape=input_shape)

# Build Model
model = models.Sequential()

model.add(input_layer)

# 1st convolution layer
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# 2nd convolution layer
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# 3rd convolution layer
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
# fully-connected layers
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

# Compile Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 

model.summary()

In [None]:
# Define checkpoint path
checkpoint_path = "best_model.keras"

# Create ModelCheckpoint callback
checkpoint = ModelCheckpoint(checkpoint_path,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             mode='max')

In [None]:
# Compile Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 

In [None]:
history = model.fit(train_generator, validation_data=val_generator, epochs=10, callbacks=[checkpoint])

In [None]:
scores = model.evaluate(test_generator) 
print('Test loss: ', scores[0])
print('Test accuracy: ', scores[1])

In [None]:
model.save("asl_alphabet_cnn.h5")

MobileNet Transfer Learning

In [None]:
image_size = 32
batch_size = 32
num_classes = 29

base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))
x = GlobalAveragePooling2D()(base_model.output)
output = Dense(num_classes, activation='softmax')(x)  # Output layer with softmax activation
mobile_model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, epochs=5, batch_size=32, validation_data=val_generator)

In [None]:
# Define checkpoint path
checkpoint_path = "best_model.keras"

# Create ModelCheckpoint callback
checkpoint = ModelCheckpoint(checkpoint_path,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             mode='max')

model.save("asl_alphabet_mobilenet.h5")

KNN

In [None]:
def flatten_data(generator):
    flattened_data_batches = []
    label_batches = []

    for i in range(len(generator)):
        batch_x, batch_y = generator[i]
        batch_x_flat = batch_x.reshape(batch_x.shape[0], -1)
        
        flattened_data_batches.append(batch_x_flat)
        label_batches.append(batch_y)

        print(f"Flattening progress: {i+1} of {len(generator)} batches", end="\r", flush=True)

    return flattened_data_batches, label_batches

In [None]:
train_x_flat_batches, train_y_batches = flatten_data(train_generator)

In [None]:
val_x_flat_batches, val_y_batches = flatten_data(val_generator)

In [None]:
# prep test data
test_x_flat_batches, test_y = flatten_data(test_generator)
test_x_flat = np.concatenate(test_x_flat_batches)
test_y = np.concatenate(test_y)

In [None]:
test_david = 'datasets/asl_alphabet/test_david'

test_david_datagenerator = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 15,
    fill_mode = 'nearest',
    horizontal_flip = True
    #width_shift_range = 0.2,
    #height_shift_range = 0.2
    )

test_david_generator = test_david_datagenerator.flow_from_directory(
    test_david,
    target_size = (32, 32),
    class_mode = 'categorical',
    shuffle = False
)

In [None]:
# david test flatten
test_d_flat_batches, test_d_y = flatten_data(test_david_generator)
test_d_flat = np.concatenate(test_d_flat_batches)
test_d_y = np.concatenate(test_d_y)

In [None]:
# Concatenate flattened data and label batches
train_x_flat = np.concatenate(train_x_flat_batches)
train_y = np.concatenate(train_y_batches)
val_x_flat = np.concatenate(val_x_flat_batches)
val_y = np.concatenate(val_y_batches)

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_x_flat, train_y)
train_accuracy = knn.score(train_x_flat, train_y)
print("Training Accuracy: ", train_accuracy)

# Evaluate the model
val_accuracy = knn.score(val_x_flat, val_y)
print("Validation Accuracy: ", val_accuracy)

VGG

In [None]:
# Import and freeze VGG model 
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(32, 32, 3), pooling='max')
base_model.trainable = False

# Add custom top layers
inputs = Input(shape=(32,32,3))
feature_maps = base_model(inputs, training=False)

dense_layer_1 = Dense(512, activation='leaky_relu', kernel_regularizer=l2(0.0001), kernel_initializer='he_normal')(feature_maps)
dropout_1 = Dropout(0.2)(dense_layer_1)
dense_layer_2 = Dense(256, activation='leaky_relu', kernel_regularizer=l2(0.0001), kernel_initializer='he_normal')(dropout_1)
dropout_2 = Dropout(0.2)(dense_layer_2)
dense_layer_3 = Dense(128, activation='leaky_relu', kernel_regularizer=l2(0.0001), kernel_initializer='he_normal')(dropout_2)
dropout_3 = Dropout(0.2)(dense_layer_3)
predictions = Dense(29, activation='softmax',kernel_regularizer=l2(0.0001))(dropout_3)

model = Model(inputs=inputs, outputs=predictions)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.compile(optimizer=Nadam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(
    train_generator,
    batch_size = 32,
    epochs= 10,
    validation_data=(val_generator),
    shuffle=True,
    callbacks=[early_stopping]
)

Ensemble

In [None]:
#predictions_knn = knn.predict(test_x_flat)
predictions_knn = knn.predict(test_d_flat)

In [None]:
# Load saved models
model_cnn = load_model("asl_alphabet_cnn.h5")
model_mobilenet = load_model("asl_alphabet_mobilenet.h5")

true_labels = test_david_generator.classes

num_train_samples = len(train_generator)
num_val_samples = len(val_generator)

predictions_cnn = model_cnn.predict(test_david_generator)
predictions_mobilenet = model_mobilenet.predict(test_david_generator)
predictions_vgg = model.predict(test_david_generator)

combined_predictions = (predictions_cnn + predictions_mobilenet + predictions_knn + predictions_vgg) / 4

ensemble_labels = np.argmax(combined_predictions, axis=1)

ensemble_accuracy_combined = accuracy_score(true_labels, ensemble_labels)

#print("Ensemble Loss (CNN + MobileNet + KNN):", ensemble_loss_combined)
print("Ensemble Accuracy (CNN + MobileNet + KNN):", ensemble_accuracy_combined)

In [None]:
ensemble_predicted_labels = np.argmax(combined_predictions, axis=1)

true_labels = test_david_generator.classes

f1 = f1_score(true_labels, ensemble_predicted_labels, average='weighted')
recall = recall_score(true_labels, ensemble_predicted_labels, average='weighted')
accuracy = accuracy_score(true_labels, ensemble_predicted_labels)

print("F1-score:", f1)
print("Recall:", recall)
print("Accuracy:", accuracy)