In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the dataset path
dataset_path = r'C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train'

# Initialize lists to hold data and labels
data = []
labels = []

# Load the dataset
for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if not os.path.isdir(label_path):
        continue  # Skip any non-directory files
    print(f"Processing directory: {label_path}")
    for image_path in os.listdir(label_path):
        # Read the image
        image = cv2.imread(os.path.join(label_path, image_path))
        if image is None:
            continue  # Skip if the image couldn't be read
        # Resize the image
        resized_image = cv2.resize(image, (64, 64))
        # Append the image and label to the lists
        data.append(resized_image)
        first_char = label[0].upper()
        label_index = ord(first_char) - ord('A')
        if label_index < 0 or label_index >= 26:
            print(f"Invalid label: {label} (converted to {label_index})")
        else:
            labels.append(label_index)

# Convert lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Normalize the data
data = data / 255.0

# One-hot encode the labels
labels = np.eye(26)[labels]

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Define the model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(26, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Create a data generator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Train the model
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
model.fit(train_generator, epochs=10, validation_data=(X_test, y_test))

# Save the model
model.save(r'C:\Users\ALOK RAI\OneDrive\Desktop\ML.h5')

Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\A
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\B
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\C
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\D
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\del
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\E
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\F
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\G
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\H
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\I
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\J
Processing directory: C:\Users\ALOK RAI\OneDrive\Desktop\sign\asl_alphabet_train\K
Pr

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10


  self._warn_if_super_not_called()


[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 55ms/step - accuracy: 0.1841 - loss: 2.7378 - val_accuracy: 0.6538 - val_loss: 1.0588
Epoch 2/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 52ms/step - accuracy: 0.5430 - loss: 1.3949 - val_accuracy: 0.7983 - val_loss: 0.5922
Epoch 3/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 49ms/step - accuracy: 0.6716 - loss: 0.9692 - val_accuracy: 0.8567 - val_loss: 0.4192
Epoch 4/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 49ms/step - accuracy: 0.7454 - loss: 0.7443 - val_accuracy: 0.9027 - val_loss: 0.2860
Epoch 5/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 50ms/step - accuracy: 0.7936 - loss: 0.6131 - val_accuracy: 0.9069 - val_loss: 0.2634
Epoch 6/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 48ms/step - accuracy: 0.8166 - loss: 0.5389 - val_accuracy: 0.9363 - val_loss: 0.1857
Epoch 7/10




In [2]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model(r'C:\Users\ALOK RAI\OneDrive\Desktop\ML.h5')

# Dictionary to map the output of the model to corresponding sign language letters
sign_dict = {i: chr(65 + i) for i in range(26)}

# Initialize MediaPipe hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Function to preprocess the detected hand region
def preprocess_frame(frame):
    try:
        # Resize the frame to 64x64 pixels
        resized_frame = cv2.resize(frame, (64, 64))
        # Normalize the frame
        normalized_frame = resized_frame / 255.0
        # Expand dimensions to match the input shape of the model
        return np.expand_dims(normalized_frame, axis=0)
    except cv2.error:
        print("Error resizing the frame. Skipping this frame.")
        return None

# Start capturing video from the webcam
cap = cv2.VideoCapture(0)

with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while True:
        ret, frame = cap.read()

        if not ret:
            break

        # Flip the frame horizontally 
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB as MediaPipe requires it
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame to detect hands
        result = hands.process(rgb_frame)

        # If hands are detected
        if result.multi_hand_landmarks:
            for hand_landmarks, handedness in zip(result.multi_hand_landmarks, result.multi_handedness):
                # Get the bounding box of the hand
                x_min = min([lm.x for lm in hand_landmarks.landmark]) * frame.shape[1]
                y_min = min([lm.y for lm in hand_landmarks.landmark]) * frame.shape[0]
                x_max = max([lm.x for lm in hand_landmarks.landmark]) * frame.shape[1]
                y_max = max([lm.y for lm in hand_landmarks.landmark]) * frame.shape[0]

                # Convert to integer values
                x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)

                # Extract the hand region
                hand_roi = frame[y_min:y_max, x_min:x_max]

                # Preprocess the hand region
                preprocessed_hand = preprocess_frame(hand_roi)
                if preprocessed_hand is not None:
                    # Make a prediction
                    prediction = model.predict(preprocessed_hand)

                    # Get the index with the highest probability
                    predicted_index = np.argmax(prediction)

                    # Get the corresponding letter
                    predicted_letter = sign_dict[predicted_index]

                    # Draw the hand landmarks on the frame
                    #mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    # Display the predicted letter on the frame
                    cv2.putText(frame, predicted_letter, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                    # Draw a rectangle around the hand
                    cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        # Display the frame
        cv2.imshow('Sign Language Detection', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17