In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import zipfile
import os

ZIP_PATH = "/content/drive/MyDrive/archive.zip"  # <-- CHANGE if needed
EXTRACT_PATH = "/content/asl_dataset"

os.makedirs(EXTRACT_PATH, exist_ok=True)

with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(EXTRACT_PATH)

print("Extracted to:", EXTRACT_PATH)
print("Folders:", os.listdir(EXTRACT_PATH))


Extracted to: /content/asl_dataset
Folders: ['asl_alphabet_train', 'asl_alphabet_test']


In [4]:
TRAIN_DIR = f"{EXTRACT_PATH}/asl_alphabet_train"
TEST_DIR = f"{EXTRACT_PATH}/asl_alphabet_test"


In [5]:
import os

print("Train exists?", os.path.exists(TRAIN_DIR))
print("Train folders:", os.listdir(TRAIN_DIR))
print("-" * 50)
print("Test exists?", os.path.exists(TEST_DIR))
print("Test folders:", os.listdir(TEST_DIR))


Train exists? True
Train folders: ['asl_alphabet_train']
--------------------------------------------------
Test exists? True
Test folders: ['asl_alphabet_test']


In [11]:
# Cell 1: Imports (run this first)

import os
import time
import numpy as np
import cv2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [7]:
import tensorflow as tf

print("TF version:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices('GPU'))


TF version: 2.19.0
GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [26]:
# Cell 2: Configuration (UPDATED)

import os

TRAIN_DIR = "/content/asl_dataset/asl_alphabet_train/asl_alphabet_train"
print("TRAIN_DIR:", TRAIN_DIR)
print("Exists?", os.path.exists(TRAIN_DIR))

# Peek inside to see what’s there
print("Sample contents:", os.listdir(TRAIN_DIR)[:20])

IMG_HEIGHT = 200
IMG_WIDTH = 200
BATCH_SIZE = 32
EPOCHS = 30

# Our target classes
CLASSES = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ["Space", "Del", "Nothing"]

print("Number of classes:", len(CLASSES))
print("Classes:", CLASSES)


TRAIN_DIR: /content/asl_dataset/asl_alphabet_train/asl_alphabet_train
Exists? True
Sample contents: ['J', 'V', 'G', 'H', 'M', 'B', 'L', 'X', 'space', 'U', 'E', 'A', 'S', 'P', 'D', 'O', 'Y', 'del', 'I', 'Z']
Number of classes: 29
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'Space', 'Del', 'Nothing']


In [27]:
# Cell 3: Create train & validation generators

datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    validation_split=0.2,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    shear_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest"
)

train_generator = datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="training",
    shuffle=True,
    classes=CLASSES  # force class order
)

val_generator = datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="validation",
    shuffle=False,
    classes=CLASSES
)


Found 62400 images belonging to 29 classes.
Found 15600 images belonging to 29 classes.


In [28]:
# Cell 4: Define an improved CNN model with BatchNorm & Dropout

num_classes = train_generator.num_classes   # should be 29
print("Number of classes in generator:", num_classes)

from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)),

    # Block 1
    layers.Conv2D(32, (3, 3), padding="same"),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.MaxPooling2D((2, 2)),

    # Block 2
    layers.Conv2D(64, (3, 3), padding="same"),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.MaxPooling2D((2, 2)),

    # Block 3
    layers.Conv2D(128, (3, 3), padding="same"),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.MaxPooling2D((2, 2)),

    # Block 4 (light)
    layers.Conv2D(256, (3, 3), padding="same"),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(512),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.Dropout(0.5),

    layers.Dense(num_classes, activation="softmax")
])

# If mixed precision is enabled, optimizer will handle scaling
opt = keras.optimizers.Adam(learning_rate=1e-3)

model.compile(
    optimizer=opt,
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


Number of classes in generator: 29


In [None]:
# Cell 5: Train the model with callbacks (best model selection)

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

BEST_MODEL_PATH = "asl_best_model.h5"

checkpoint_cb = ModelCheckpoint(
    BEST_MODEL_PATH,
    monitor="val_accuracy",
    mode="max",
    save_best_only=True,
    verbose=1
)

earlystop_cb = EarlyStopping(
    monitor="val_loss",
    mode="min",
    patience=5,          # stop if no improvement for 5 epochs
    restore_best_weights=True,
    verbose=1
)

reducelr_cb = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.3,
    patience=3,          # after 3 bad epochs → reduce LR
    min_lr=1e-6,
    verbose=1
)

steps_per_epoch = train_generator.samples // BATCH_SIZE
validation_steps = val_generator.samples // BATCH_SIZE

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_generator,
    validation_steps=validation_steps,
    callbacks=[checkpoint_cb, earlystop_cb, reducelr_cb]
)


Epoch 1/30
[1m 193/1950[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m9:08[0m 312ms/step - accuracy: 0.1292 - loss: 3.4109

In [None]:
# Cell 6: Save class indices (model already saved via checkpoint)

CLASS_INDICES_PATH = "asl_class_indices.npy"

np.save(CLASS_INDICES_PATH, train_generator.class_indices)

print("✅ Saved best model to:", BEST_MODEL_PATH)
print("✅ Saved class indices to:", CLASS_INDICES_PATH)
print("Class indices:", train_generator.class_indices)


In [None]:
# Cell 7: Load BEST model + class mapping & evaluate

from tensorflow import keras
import numpy as np

BEST_MODEL_PATH = "asl_best_model.h5"
CLASS_INDICES_PATH = "asl_class_indices.npy"

model = keras.models.load_model(BEST_MODEL_PATH)
class_indices = np.load(CLASS_INDICES_PATH, allow_pickle=True).item()

index_to_class = {v: k for k, v in class_indices.items()}

print("✅ Loaded best model from:", BEST_MODEL_PATH)
print("Classes (index_to_class):", index_to_class)

# Quick evaluation on validation set
val_loss, val_acc = model.evaluate(val_generator, verbose=1)
print(f"Validation loss: {val_loss:.4f}")
print(f"Validation accuracy: {val_acc:.4f}")


In [None]:
# Cell 8: Prediction helper for one frame (ROI from camera)

def preprocess_frame(frame):
    # frame is BGR from OpenCV
    img = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))
    img = img.astype("float32") / 255.0
    img = np.expand_dims(img, axis=0)
    return img

def predict_label(frame):
    """
    frame: ROI (BGR) with hand sign
    returns: label (A-Z, Space, Del, Nothing), confidence
    """
    img = preprocess_frame(frame)
    preds = model.predict(img, verbose=0)[0]
    idx = np.argmax(preds)
    conf = float(preds[idx])
    label = index_to_class[idx]
    return label, conf


In [None]:
# Cell 9: Live ASL detection using webcam

CAPTURE_DURATION = 2.0   # seconds per capture
MIN_CONFIDENCE = 0.5     # adjust if needed

def run_asl_live():
    cap = cv2.VideoCapture(0)  # change to 1 if external cam

    if not cap.isOpened():
        print("Error: Cannot open camera")
        return

    current_word = ""
    sentence = ""
    last_sentence = ""
    capturing = False
    start_time = None
    predictions_window = []
    last_action = ""

    print("Controls:")
    print("  's'  - start capturing next sign (2 seconds)")
    print("  'c'  - clear current word & sentence")
    print("  'q'  - quit")
    print("")
    print("Sign meanings:")
    print("  A-Z     -> letters")
    print("  Space   -> space (end word / add space)")
    print("  Del     -> delete last character")
    print("  Nothing -> ENTER (finalize sentence)")

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        h, w, _ = frame.shape

        # Central ROI box
        box_size = int(min(h, w) * 0.5)
        x1 = w // 2 - box_size // 2
        y1 = h // 2 - box_size // 2
        x2 = x1 + box_size
        y2 = y1 + box_size

        roi = frame[y1:y2, x1:x2]

        # Capture logic
        if capturing:
            elapsed = time.time() - start_time
            if elapsed <= CAPTURE_DURATION:
                if roi.size != 0:
                    label, conf = predict_label(roi)
                    if conf >= MIN_CONFIDENCE:
                        predictions_window.append(label)

                cv2.putText(frame, "CAPTURING...", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
            else:
                capturing = False
                final_label = None
                if predictions_window:
                    final_label = max(set(predictions_window),
                                      key=predictions_window.count)
                predictions_window = []

                if final_label is not None:
                    if final_label in list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
                        current_word += final_label
                        last_action = f"Letter: {final_label}"
                        print(f"Captured letter: {final_label}")

                    elif final_label == "Space":
                        if current_word:
                            sentence += current_word + " "
                            print("Word added to sentence:", current_word)
                            current_word = ""
                        else:
                            sentence += " "
                        last_action = "Space (word/space added)"

                    elif final_label == "Del":
                        if current_word:
                            current_word = current_word[:-1]
                            last_action = "Deleted last char in word"
                        else:
                            sentence = sentence[:-1]
                            last_action = "Deleted last char in sentence"
                        print("Delete action.")

                    elif final_label == "Nothing":
                        last_sentence = sentence.strip()
                        print("Sentence ENTERED:", last_sentence)
                        current_word = ""
                        sentence = ""
                        last_action = "Entered sentence (Nothing)"

        # Draw ROI box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Draw text overlays
        cv2.putText(frame, f"Word: {current_word}", (10, h - 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
        cv2.putText(frame, f"Sentence: {sentence}", (10, h - 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
        cv2.putText(frame, f"Last: {last_action}", (10, h - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

        if last_sentence:
            cv2.putText(frame, f"Entered: {last_sentence}",
                        (10, 40), cv2.FONT_HERSHEY_SIMPLEX,
                        0.7, (0, 200, 255), 2)

        cv2.imshow("ASL Live", frame)

        key = cv2.waitKey(1) & 0xFF

        if key == ord('q'):
            break

        if key == ord('s') and not capturing:
            capturing = True
            start_time = time.time()
            predictions_window = []
            last_action = "Capturing started"

        if key == ord('c'):
            current_word = ""
            sentence = ""
            last_sentence = ""
            last_action = "Cleared all"
            print("Cleared word/sentence.")

    cap.release()
    cv2.destroyAllWindows()
    print("Final sentence buffer:", sentence)
    print("Last entered sentence:", last_sentence)


In [None]:
# Cell 10: Start live ASL detection

run_asl_live()
