In [2]:
import os
import random
import cv2
import numpy as np
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing import image

#Downloading YOLO dataset from roboflow

In [3]:
!pip install -q roboflow
from google.colab import userdata
from roboflow import Roboflow

rf = Roboflow(api_key="IVbw8GePPFfYhH9xk6mu")
project = rf.workspace("majorproject-25tao").project("american-sign-language-v36cz")
version = project.version(2)
dataset = version.download("yolov11", location='dataset')

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/80.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.9/80.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in dataset to yolov11:: 100%|██████████| 462106/462106 [00:24<00:00, 19205.30it/s]





Extracting Dataset Version Zip to dataset in yolov11:: 100%|██████████| 40560/40560 [00:08<00:00, 4656.34it/s]


#YOLO to CNN conversion

In [4]:
class_names = [
    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
    "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "additional", "alcohol", "allergy",
    "bacon", "bag", "barbecue", "bill", "biscuit", "bitter", "bread", "burger", "bye",
    "cake", "cash", "cheese", "chicken", "coke", "cold", "cost", "coupon", "credit card",
    "cup", "dessert", "drink", "drive", "eat", "eggs", "enjoy", "fork", "french fries",
    "fresh", "hello", "hot", "icecream", "ingredients", "juicy", "ketchup", "lactose",
    "lettuce", "lid", "manager", "menu", "milk", "mustard", "napkin", "no", "order",
    "pepper", "pickle", "pizza", "please", "ready", "receipt", "refill", "repeat", "safe",
    "salt", "sandwich", "sauce", "small", "soda", "sorry", "spicy", "spoon", "straw",
    "sugar", "sweet", "thank-you", "tissues", "tomato", "total", "urgent", "vegetables",
    "wait", "warm", "water", "what", "would", "yoghurt", "your"
]

In [5]:
def yolo_to_cnn_format(yolo_annotation, img_width, img_height):
    class_id, x_center, y_center, width, height = map(float, yolo_annotation)
    x_center, y_center, width, height = (
        x_center * img_width,
        y_center * img_height,
        width * img_width,
        height * img_height,
    )
    x_min = int(x_center - width / 2)
    y_min = int(y_center - height / 2)
    x_max = int(x_center + width / 2)
    y_max = int(y_center + height / 2)
    return int(class_id), x_min, y_min, x_max, y_max

def convert_dataset(images_dir, labels_dir, output_dir, class_names, target_size=(255, 255)):
    os.makedirs(output_dir, exist_ok=True)
    for label_file in os.listdir(labels_dir):
        img_file = label_file.replace('.txt', '.jpg')
        img_path = os.path.join(images_dir, img_file)
        label_path = os.path.join(labels_dir, label_file)

        if not os.path.exists(img_path):
            print(f"Image file {img_path} not found, skipping.")
            continue

        img = cv2.imread(img_path)
        img_height, img_width, _ = img.shape

        with open(label_path, 'r') as f:
            for line in f.readlines():
                class_id, x_min, y_min, x_max, y_max = yolo_to_cnn_format(
                    line.strip().split(), img_width, img_height
                )

                class_name = class_names[class_id]
                class_dir = os.path.join(output_dir, class_name)
                os.makedirs(class_dir, exist_ok=True)

                cropped_img = img[y_min:y_max, x_min:x_max]
                resized_img = cv2.resize(cropped_img, target_size)

                output_img_path = os.path.join(
                    class_dir, f"{os.path.splitext(img_file)[0]}_{x_min}_{y_min}.jpg"
                )
                cv2.imwrite(output_img_path, resized_img)


In [6]:
#train set conversion
images_dir = "/content/dataset/train/images"
labels_dir = "/content/dataset/train/labels"
output_dir = "/content/train"

convert_dataset(images_dir, labels_dir, output_dir, class_names, target_size=(255, 255))

In [7]:
#validation set conversion
images_dir = "/content/dataset/valid/images"
labels_dir = "/content/dataset/valid/labels"
output_dir = "/content/valid"

convert_dataset(images_dir, labels_dir, output_dir, class_names, target_size=(255, 255))

In [8]:
#test set conversion
images_dir = "/content/dataset/test/images"
labels_dir = "/content/dataset/test/labels"
output_dir = "/content/test"

convert_dataset(images_dir, labels_dir, output_dir, class_names, target_size=(255, 255))

In [None]:
#!zip -r test_dataset_CNN.zip test/
#from google.colab import files
#files.download('test_dataset_CNN.zip')

In [9]:
train_dir = '/content/train'
val_dir = '/content/valid'

In [16]:
#Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

#train and validation generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(255, 255),
    batch_size=32,
    class_mode='sparse'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(255, 255),
    batch_size=32,
    class_mode='sparse'
)

Found 18111 images belonging to 106 classes.
Found 1566 images belonging to 106 classes.


In [17]:
#Calculate Class Weights
#Use the training generator to get the labels
training_labels = train_generator.classes
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(training_labels),
    y=training_labels
)
class_weights = dict(enumerate(class_weights))

#Custom CNN Model

In [18]:
#Custom CNN model
model = models.Sequential()

#convolutional and pooling layers with L2 regularization
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(255, 255, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.MaxPooling2D((2, 2)))

# Flatten and Dense layers
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dropout(0.3))  # Dropout to prevent overfitting
model.add(layers.Dense(106, activation='softmax'))  # Output layer

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.


In [19]:
# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',  # For integer-encoded labels
    metrics=['accuracy']
)

In [20]:
# EarlyStopping to stop training if validation accuracy doesn't improve
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,  # Number of epochs with no improvement
    restore_best_weights=True
)

# Learning rate reduction on plateau to reduce learning rate if no improvement
lr_reduction = ReduceLROnPlateau(
    monitor='val_loss',
    patience=3,
    factor=0.1,
    min_lr=1e-6
)

In [21]:
#Model Training with Class Weights and Callbacks
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=30,
    validation_data=val_generator,
    validation_steps=val_generator.samples // val_generator.batch_size,
    class_weight=class_weights,
    callbacks=[early_stopping, lr_reduction],
)

Epoch 1/30


Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.


[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 515ms/step - accuracy: 0.0757 - loss: 4.7348 - val_accuracy: 0.4512 - val_loss: 2.6194 - learning_rate: 1.0000e-04
Epoch 2/30
[1m  1/565[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m40s[0m 71ms/step - accuracy: 0.3438 - loss: 3.6621

Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.


[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.3438 - loss: 3.6621 - val_accuracy: 0.4000 - val_loss: 2.6148 - learning_rate: 1.0000e-04
Epoch 3/30
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 488ms/step - accuracy: 0.3106 - loss: 3.0848 - val_accuracy: 0.6400 - val_loss: 1.9447 - learning_rate: 1.0000e-04
Epoch 4/30
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.5000 - loss: 2.6027 - val_accuracy: 0.5667 - val_loss: 2.1315 - learning_rate: 1.0000e-04
Epoch 5/30
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 488ms/step - accuracy: 0.4190 - loss: 2.6166 - val_accuracy: 0.6940 - val_loss: 1.6592 - learning_rate: 1.0000e-04
Epoch 6/30
[1m565/565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65us/step - accuracy: 0.4375 - loss: 2.8713 - val_accuracy: 0.6000 - val_loss: 1.9899 - l

In [22]:
model.summary()

In [31]:
#from google.colab import files

#model.save('custom_cnn.h5')
#files.download('custom_cnn.h5')

#Training and Validation metrics

In [24]:
#model evaluation
train_loss, train_accuracy = model.evaluate(train_generator)
print(f"Train Loss: {train_loss}")
print(f"Train Accuracy: {train_accuracy}")

val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 487ms/step - accuracy: 0.7443 - loss: 1.4325
Train Loss: 1.4262231588363647
Train Accuracy: 0.7428082227706909
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 75ms/step - accuracy: 0.8734 - loss: 1.0105
Validation Loss: 1.0378884077072144
Validation Accuracy: 0.85887610912323


#Model evaluation on Test set

In [25]:
test_dir = '/content/test'

In [27]:
# Data augmentation for test set (only rescaling in this case)
test_datagen = ImageDataGenerator(rescale=1./255)

# Test data generator
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(255, 255),
    batch_size=32,
    class_mode='sparse',
    shuffle=False
)

Found 842 images belonging to 106 classes.


In [28]:
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step - accuracy: 0.7563 - loss: 1.4314
Test Loss: 1.085860013961792
Test Accuracy: 0.8521634340286255


#Prediction on a sample from test set

In [30]:
img_path = '/content/test/burger/augmented_video_11_shifting_mp4-6_jpg.rf.69232f921ae38c375ad0dee43bc30f02_237_270.jpg'
#img_path = '/content/test/thank-you/augmented_video_4_darkness_mp4-6_jpg.rf.a4fd4af6858cdfd44c8058f797a1e014_276_246.jpg'

img = image.load_img(img_path, target_size=(255, 255))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)

actual_class_name = img_path.split('/')[-2]
actual_class_index = test_generator.class_indices[actual_class_name]
print(f"Actual Class Index: {actual_class_index}")
print(f"Actual Class Name: {actual_class_name}")

#prediction
predictions = model.predict(img_array)
predicted_class = np.argmax(predictions)
print(f"Predicted Class Index: {predicted_class}")
class_names = test_generator.class_indices
class_names = {v: k for k, v in class_names.items()}
print(f"Predicted Class Name: {class_names[predicted_class]}")

Actual Class Index: 36
Actual Class Name: burger
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Predicted Class Index: 36
Predicted Class Name: burger
