In [14]:
from google.colab import files
uploaded = files.upload()


Saving sign_mnist_train.csv to sign_mnist_train (2).csv


In [15]:
from google.colab import files
uploaded = files.upload()


Saving sign_mnist_test.csv to sign_mnist_test (2).csv


In [57]:
# Install required libraries for training, image processing, and display
!pip install tensorflow opencv-python-headless matplotlib



In [58]:
# Import necessary libraries
import numpy as np
import cv2
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from IPython.display import display, Javascript, clear_output
from google.colab import output
from base64 import b64decode
import matplotlib.pyplot as plt
import time

In [59]:
# Load the CSV files
try:
    train_df = pd.read_csv("sign_mnist_train.csv")
    test_df = pd.read_csv("sign_mnist_test.csv")
    print("CSV files loaded successfully.")
except FileNotFoundError:
    print("Error: sign_mnist_train.csv or sign_mnist_test.csv not found. Upload them:")
    from google.colab import files
    files.upload()
    train_df = pd.read_csv("sign_mnist_train.csv")
    test_df = pd.read_csv("sign_mnist_test.csv")

# Split features and labels
X_train = train_df.drop('label', axis=1).values
y_train = train_df['label'].values
X_test = test_df.drop('label', axis=1).values
y_test = test_df['label'].values

# Check unique labels to identify invalid values
print("Unique labels in y_train:", np.unique(y_train))
print("Unique labels in y_test:", np.unique(y_test))

# Filter out invalid labels (keep only 0-23)
valid_indices_train = (y_train >= 0) & (y_train < 24)
valid_indices_test = (y_test >= 0) & (y_test < 24)

X_train = X_train[valid_indices_train]
y_train = y_train[valid_indices_train]
X_test = X_test[valid_indices_test]
y_test = y_test[valid_indices_test]

print("After filtering:")
print("Unique labels in y_train:", np.unique(y_train))
print("Unique labels in y_test:", np.unique(y_test))

# Reshape to 28x28x1 and normalize
X_train = X_train.reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.reshape(-1, 28, 28, 1) / 255.0

# One-hot encode labels (24 classes: A-Y, excluding J and Z)
y_train = to_categorical(y_train, num_classes=24)
y_test = to_categorical(y_test, num_classes=24)

print(f"Training data shape: {X_train.shape}, Labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}, Labels shape: {y_test.shape}")

CSV files loaded successfully.
Unique labels in y_train: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
Unique labels in y_test: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
After filtering:
Unique labels in y_train: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Unique labels in y_test: [ 0  1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Training data shape: (26337, 28, 28, 1), Labels shape: (26337, 24)
Test data shape: (6840, 28, 28, 1), Labels shape: (6840, 24)


In [60]:
# Build the CNN model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(24, activation='softmax')  # 24 classes (A-Y, excluding J and Z)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model (~5-10 min with GPU; enable via Runtime > Change runtime type > GPU)
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_data=(X_test, y_test))

# Save the model
model.save("sign_language_cnn.h5")
print("Model saved as sign_language_cnn.h5")

# Optional: Download the model
from google.colab import files
files.download("sign_language_cnn.h5")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 93ms/step - accuracy: 0.2341 - loss: 2.5759 - val_accuracy: 0.7652 - val_loss: 0.7847
Epoch 2/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 76ms/step - accuracy: 0.7381 - loss: 0.7939 - val_accuracy: 0.8639 - val_loss: 0.4311
Epoch 3/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 74ms/step - accuracy: 0.8635 - loss: 0.4098 - val_accuracy: 0.9066 - val_loss: 0.3015
Epoch 4/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 77ms/step - accuracy: 0.9190 - loss: 0.2468 - val_accuracy: 0.9294 - val_loss: 0.2374
Epoch 5/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 84ms/step - accuracy: 0.9429 - loss: 0.1773 - val_accuracy: 0.9424 - val_loss: 0.2235




Model saved as sign_language_cnn.h5


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [61]:
# Load the saved model
try:
    model = load_model('sign_language_cnn.h5')
    print("Model loaded successfully.")
except FileNotFoundError:
    print("Error: sign_language_cnn.h5 not found. Ensure Cell 4 ran successfully.")
    raise
except Exception as e:
    print(f"Error loading model: {str(e)}")
    raise



Model loaded successfully.


In [62]:
# Function to capture webcam frame
def capture_webcam_frame(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
            const div = document.createElement('div');
            const video = document.createElement('video');
            video.width = 400;
            video.height = 300;
            video.style.display = 'block';
            div.appendChild(video);

            const capture = document.createElement('button');
            capture.textContent = 'Capture';
            capture.style.margin = '10px';
            div.appendChild(capture);

            const stopButton = document.createElement('button');
            stopButton.textContent = 'Stop Webcam';
            stopButton.style.margin = '10px';
            div.appendChild(stopButton);

            document.body.appendChild(div);

            const stream = await navigator.mediaDevices.getUserMedia({video: true});
            video.srcObject = stream;
            await video.play();

            google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

            return new Promise((resolve) => {
                capture.onclick = () => {
                    const canvas = document.createElement('canvas');
                    canvas.width = video.videoWidth;
                    canvas.height = video.videoHeight;
                    canvas.getContext('2d').drawImage(video, 0, 0);
                    resolve(canvas.toDataURL('image/jpeg', quality));
                };
                stopButton.onclick = () => {
                    stream.getTracks().forEach(track => track.stop());
                    div.remove();
                    resolve(null);
                };
            });
        }
    ''')
    display(js)
    data = output.eval_js('takePhoto({})'.format(quality))
    if data is None:
        return None
    binary = b64decode(data.split(',')[1])
    with open(filename, 'wb') as f:
        f.write(binary)
    return filename

In [63]:
# Function to process image and predict sign
def process_and_predict(image, model):
    # Crop to focus on hand (adjust x, y, w, h as needed)
    x, y, w, h = 50, 50, 300, 300  # Adjust based on webcam frame
    cropped = image[y:y+h, x:x+w]

    # Convert to grayscale
    gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur
    blurred = cv2.GaussianBlur(gray, (15, 15), 0)

    # Resize to 28x28
    resized = cv2.resize(blurred, (28, 28), interpolation=cv2.INTER_AREA)

    # Normalize and reshape
    data = np.asarray(resized, dtype="float32") / 255.0
    data = np.expand_dims(data, axis=0)
    data = np.expand_dims(data, axis=-1)

    # Predict
    pred_probab = model.predict(data)[0]
    pred_class = np.argmax(pred_probab)
    probab = np.max(pred_probab)

    # Map class to letter (0=A, 1=B, ..., 8=I, 10=K, ..., 23=Y)
    if pred_class < 9:
        letter = chr(pred_class + 65)
    else:
        letter = chr(pred_class + 66)

    # Return "Unknown" if confidence is low
    if probab < 0.5:  # Adjustable threshold
        letter = "Unknown"

    return letter, probab, cropped

In [64]:
# Real-time webcam prediction with text overlay
while True:
    try:
        # Capture frame
        filename = capture_webcam_frame()
        if filename is None:
            print("Webcam stopped.")
            break

        # Load frame
        frame = cv2.imread(filename)
        if frame is None:
            print("Error: Failed to load captured image.")
            continue

        # Process and predict
        letter, probab, cropped = process_and_predict(frame, model)

        # Overlay prediction text on the frame
        text = f'Predicted: {letter} ({probab:.2f})'
        cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # Display frame with prediction
        plt.figure(figsize=(8, 4))
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.title(text)
        plt.axis('off')
        plt.show()

        # Clear previous output for pseudo-real-time effect
        clear_output(wait=True)

        # Continue or stop
        cont = input("Capture another image? (y/n): ").strip().lower()
        if cont != 'y':
            break

    except Exception as err:
        print(f"Error: {str(err)}")
        break

Capture another image? (y/n): n
