In [3]:
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical

# Load the training and test data
train_file_path = 'C:/Users/saatv/downloads/archive/sign_mnist_train.csv'  # Replace with your actual training file path
test_file_path = 'C:/Users/saatv/downloads/archive/sign_mnist_test.csv'    # Replace with your actual test file path

train_data = pd.read_csv(train_file_path)
test_data = pd.read_csv(test_file_path)

# Separate features (pixels) and labels for training data
X_train = train_data.drop('label', axis=1).values  # Extract pixel values for training
y_train = train_data['label'].values  # Extract labels for training

# Separate features (pixels) and labels for test data
X_test = test_data.drop('label', axis=1).values  # Extract pixel values for testing
y_test = test_data['label'].values  # Extract labels for testing

# Reshape the image data from 1D to 2D (28x28) and normalize to range [0, 1]
X_train_reshaped = X_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
X_test_reshaped = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Convert labels to categorical format (one-hot encoding) with 27 classes
y_train_categorical = to_categorical(y_train, num_classes=25)
y_test_categorical = to_categorical(y_test, num_classes=25)

# Print shapes to confirm successful processing
print(f"Training data shape: {X_train_reshaped.shape}")
print(f"Test data shape: {X_test_reshaped.shape}")
print(f"Training labels shape: {y_train_categorical.shape}")
print(f"Test labels shape: {y_test_categorical.shape}")


Training data shape: (27455, 28, 28, 1)
Test data shape: (7172, 28, 28, 1)
Training labels shape: (27455, 25)
Test labels shape: (7172, 25)


In [5]:
import cv2
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from collections import deque

# Load your pre-trained CNN model
cnn_model = load_model('SignLanguage.h5')  # Replace with your CNN model path

# Define the LSTM model to capture temporal dependencies
lstm_model = Sequential()
lstm_model.add(LSTM(64, input_shape=(10, 128), return_sequences=True))
lstm_model.add(LSTM(128, return_sequences=False))
lstm_model.add(Dense(25, activation='softmax'))
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Initialize the video capture object
cap = cv2.VideoCapture(0)  # Use 0 for the default camera

# To store the features extracted from each frame
frame_queue = deque(maxlen=10)  # Keep last 10 frames for temporal analysis

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    resized_frame = cv2.resize(gray_frame, (28, 28))
    normalized_frame = resized_frame.astype('float32') / 255.0
    reshaped_frame = np.reshape(normalized_frame, (1, 28, 28, 1))

    # Extract features from the frame using the CNN model
    frame_features = cnn_model.predict(reshaped_frame)

    # Store the frame features in the deque
    frame_queue.append(frame_features.flatten())

    # When we have enough frames, use LSTM to predict the sign language
    if len(frame_queue) == 10:
        # Prepare the input for the LSTM model
        lstm_input = np.array(frame_queue)
        lstm_input = np.reshape(lstm_input, (1, 10, 128))  # Reshape to (batch_size, timesteps, features)

        # Predict the sign language
        prediction = lstm_model.predict(lstm_input)
        predicted_class = np.argmax(prediction)

        # Display the predicted class
        print(f"Predicted sign: {predicted_class}")

    # Display the original video feed
    cv2.imshow('Sign Language Video Translation', frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 3, 128)         73856     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 1, 1, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 128)              

In [5]:
import cv2
import numpy as np
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Dense
from collections import deque

# Load pre-trained CNN model
cnn_model = load_model('SignLanguage.h5')

# Extract features from the second-to-last layer
feature_extractor = Model(inputs=cnn_model.input, outputs=cnn_model.layers[-2].output)

# Define the LSTM model for temporal analysis
lstm_model = Sequential()
lstm_model.add(LSTM(64, input_shape=(10, 256), return_sequences=True))  # Adjusted input shape
lstm_model.add(LSTM(128, return_sequences=False))
lstm_model.add(Dense(25, activation='softmax'))
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Initialize the video capture object
cap = cv2.VideoCapture(0)

# To store the features extracted from each frame
frame_queue = deque(maxlen=10)  # Keep last 10 frames for temporal analysis

# Translation unit: 0 -> 'a', 1 -> 'b', ..., 25 -> 'z'
letters = {i: chr(97 + i) for i in range(26)}  # chr(97) is 'a'

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    resized_frame = cv2.resize(gray_frame, (28, 28))
    normalized_frame = resized_frame.astype('float32') / 255.0
    reshaped_frame = np.reshape(normalized_frame, (1, 28, 28, 1))

    # Extract features from the frame using the CNN model
    frame_features = feature_extractor.predict(reshaped_frame)

    # Store the frame features in the deque
    frame_queue.append(frame_features.flatten())

    # When we have enough frames, use LSTM to predict the sign language
    if len(frame_queue) == 10:
        lstm_input = np.array(frame_queue)
        lstm_input = np.reshape(lstm_input, (1, 10, 256))  # Correct shape for LSTM

        # Predict the sign language
        prediction = lstm_model.predict(lstm_input)
        predicted_class = np.argmax(prediction)

        # Translate the predicted class to the corresponding letter
        predicted_letter = letters.get(predicted_class, '')  # Default to empty if not found
        print(predicted_letter, end=' ')  # Print the letter with a space separator

    # Display the original video feed
    cv2.imshow('Sign Language Video Translation', frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


c c c c c c c i i i b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i m m m m m m m m m m m m m m m q q q q q m m m m m m m m m q m m m m m m m m m m m q q q q q q q q q q q q q q q q q q q q q q q q q q m m m m m m m m q q q q q q q q q q q q q q q q q q q q q q q q q q q q q q q q m m m m m m m m m m m q m m m m m m m m m m m o m m m m m m m m m o o o o o o m m m m m m o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o o m m m m m m m i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i b b b b b b b b i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i m m m m m m m m m m m m m m b i i i i i i i i i i i i b i i i b b b b b m m m m f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f f b 