# importing libraries

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.utils import to_categorical

1-OS and NumPy: For file and numerical operations.                                                              2-PyTorch Modules (torch, nn, optim, etc.): For building and
 training the neural network.
3-Scikit-learn Modules: For data splitting and evaluation metrics.
4-TensorFlow Keras: Specifically to_categorical for one-hot encoding labels.
5-Matplotlib and Seaborn: For plotting and visualizing data and results.

# Define the data path and action labels

In [2]:
DATA_PATH = 'F:/NEWAI/sign language/Data_MP/'  # Path to dataset
actions = [
    "can_you_help_me", "doesnt_matter", "good_bey", "i_have_to_go",
    "i_think_you_are_wrong", "sorry_cant_stay", "sorry_for_being_late",
    "speak_slowly", "thanks_for_your_concern", "wish_you_luck_in_work",
    "wish_you_good_journey", "wish_you_good_vacation", "please_quickly",
    "explain_again", "repeat_again", "free_or_busy", "happy_to_know_you",
    "i_disagree", "i_agree", "i_would_like_to_meet_you", "any_service",
    "come_quickly", "happy_new_year", "how_can_i_call_you", "wait_please",
    "lets_go_swim", "whats_your_name", "what_about_going_for_a_walk",
    "unbelievable", "can_i_take_from_your_time"
]
sequence_length = 120  # Assuming each video is 4 seconds at 30 fps

# Create a label map that assigns each action a unique index
label_map = {action: i for i, action in enumerate(actions)}

1-DATA_PATH: Specifies where the dataset is stored.
2-actions: A list of all the sign language actions (classes) that the model will recognize.
3-sequence_length: Defines the fixed length for each input sequence (e.g., number of frames).

4-label_map: A dictionary that maps each action label to a unique integer index. This is essential for converting categorical labels into numerical form suitable for model training.

# 2. Data Loading and Preprocessing:
This block defines the function for padding or truncating sequences and loads the dataset from the file path. It also processes and prepares the data for training.

In [5]:
# Function to pad or truncate sequences
def pad_or_truncate(sequence, max_len):
    """Pad or truncate the sequence to ensure uniform length."""
    if len(sequence) < max_len:
        padding = np.zeros((max_len - sequence.shape[0], sequence.shape[1]))
        return np.vstack((sequence, padding))
    else:
        return sequence[:max_len]

# Initialize empty lists to hold sequences and labels
sequences, labels = [], []

# Load all sequences and labels based on the folder names
for action in actions:
    action_path = os.path.join(DATA_PATH, action)  # Path to each action folder
    if not os.path.exists(action_path):
        print(f"Directory not found: {action_path}. Skipping this action.")
        continue

    # Iterate over all files in the action folder
    for video in os.listdir(action_path):
        if video.endswith("_sequence.npy"):  # Ensure it's a sequence file
            video_path = os.path.join(action_path, video)
            try:
                # Load the sequence and adjust its length
                sequence = np.load(video_path)
                sequence = pad_or_truncate(sequence, sequence_length)
                sequences.append(sequence)
                labels.append(label_map[action])  # Add the label based on the folder name
            except Exception as e:
                print(f"Error loading file {video_path}: {e}")

# Check if any data was loaded
if not sequences or not labels:
    print("No data loaded. Please check the dataset paths and file availability.")
    exit()

# Convert sequences and labels to numpy arrays
X = np.array(sequences)
y = to_categorical(labels).astype(int)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


pad_or_truncate: Ensures that each video sequence is of the same length by either padding shorter sequences with zeros or truncating longer ones. This is important because neural networks require inputs of uniform length for batch processing.
#####################################################################################################################################################
1-sequences, labels: Lists that will store the processed video sequences and their corresponding labels.
2-os.path.join: Joins the DATA_PATH and each action to create the full path for each action folder.
3-os.listdir: Lists all the files in the folder corresponding to each action.
4-np.load: Loads the .npy file, which contains the pre-processed video sequence.
5-sequence = pad_or_truncate: Ensures that the sequence has a fixed length.
6-labels.append: Adds the corresponding action label based on the folder name to the labels list.
#######################################################################################################
1-X and y: Convert the lists of sequences and labels into numpy arrays, where X holds the video data and y holds the one-hot encoded labels.
2-to_categorical: Converts the integer labels into one-hot encoded format (needed for classification tasks).
3-train_test_split: Splits the data into training and testing sets. 80% is used for training and 20% for testing.
################################################################################################


# 3. Dataset and DataLoader Definition
This block defines a custom PyTorch dataset class and creates DataLoader objects for both training and testing datasets.

In [6]:
# Define a custom dataset class for PyTorch
class KeypointsToTextDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Create DataLoaders for training and testing
train_dataset = KeypointsToTextDataset(torch.tensor(X_train, dtype=torch.float32),
                                       torch.tensor(np.argmax(y_train, axis=1), dtype=torch.long))
test_dataset = KeypointsToTextDataset(torch.tensor(X_test, dtype=torch.float32),
                                      torch.tensor(np.argmax(y_test, axis=1), dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


KeypointsToTextDataset: A custom PyTorch dataset class. It allows easy loading and batching of sequences (features) and their labels during training and testing. The __len__ function returns the total number of samples, and the __getitem__ function returns the feature and label for a given index.########################################################################################
DataLoader: A PyTorch object that splits the dataset into batches. The train_loader will randomly shuffle the data to avoid overfitting, while the test_loader will not shuffle the test data.

# 4. Model Definition (Enhanced Bi-LSTM with Attention)
This block defines the model architecture, including a Bi-directional LSTM with an Attention mechanism and a fully connected layer for classification.

In [7]:
# Define the enhanced RNN with Bi-directional LSTM and Attention mechanism
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_output):
        attn_weights = torch.tanh(self.attn(lstm_output))
        attn_weights = torch.softmax(attn_weights, dim=1)
        context = torch.bmm(attn_weights.transpose(1, 2), lstm_output)
        return context.squeeze(1), attn_weights

class EnhancedRNNBiLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes, dropout_rate=0.3):
        super(EnhancedRNNBiLSTMModel, self).__init__() 
        self.rnn = nn.RNN(input_dim, hidden_dim, batch_first=True)
        self.bilstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.attention = Attention(hidden_dim * 2)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, x):
        rnn_out, _ = self.rnn(x)
        lstm_out, _ = self.bilstm(rnn_out)
        context, attn_weights = self.attention(lstm_out)
        context = self.dropout(context)
        output = self.fc(context)
        return output


Attention Mechanism: The attention layer is designed to focus on the most important frames in a sequence. It calculates a set of attention weights for each time step and applies them to the LSTM output. The resulting context vector highlights the most significant parts of the sequence.#############################################################################################
EnhancedRNNBiLSTMModel:
1-The model starts with a simple RNN layer, followed by a Bi-directional LSTM.
2-BiLSTM: Captures both forward and backward context in sequences, which helps in understanding dependencies in time-series data like video frames.
3-Attention: Highlights the most important frames within the sequence.
4-Dropout: Adds regularization to reduce overfitting.
5-Fully Connected Layer: Maps the LSTM's output to the final class probabilities for each sign language action.
##########################################################################################3

# 5. Training and Evaluation Setup
This block defines the functions for training and evaluating the model. It uses the AdamW optimizer, CrossEntropyLoss, and tracks loss during training. The evaluation function calculates accuracy, classification report, and confusion matrix.

In [8]:
# Define model parameters
input_dim = 258  # Number of features per frame
hidden_dim = 512  # Hidden layer size
num_classes = len(actions)  # Number of unique actions (classes)

# Initialize the model
model = EnhancedRNNBiLSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, num_classes=num_classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training function
def train_model(model, dataloader, criterion, optimizer, epochs=30):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backpropagation
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}')

# Train the model
train_model(model, train_loader, criterion, optimizer)

# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate evaluation metrics
    accuracy = accuracy_score(all_labels, all_preds)
    class_report = classification_report(all_labels, all_preds, target_names=actions)
    conf_matrix = confusion_matrix(all_labels, all_preds)

    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:\n", class_report)
    print("Confusion Matrix:\n", conf_matrix)

# Evaluate the model using the test DataLoader
evaluate_model(model, test_loader)


Epoch 1/30, Loss: 2.7522
Epoch 2/30, Loss: 2.2006
Epoch 3/30, Loss: 1.7888
Epoch 4/30, Loss: 1.3983
Epoch 5/30, Loss: 1.0988
Epoch 6/30, Loss: 0.9746
Epoch 7/30, Loss: 0.8436
Epoch 8/30, Loss: 0.7241
Epoch 9/30, Loss: 0.6983
Epoch 10/30, Loss: 0.5625
Epoch 11/30, Loss: 0.5134
Epoch 12/30, Loss: 0.4365
Epoch 13/30, Loss: 0.4915
Epoch 14/30, Loss: 0.3823
Epoch 15/30, Loss: 0.9297
Epoch 16/30, Loss: 0.6008
Epoch 17/30, Loss: 0.4782
Epoch 18/30, Loss: 0.4586
Epoch 19/30, Loss: 0.5067
Epoch 20/30, Loss: 0.3740
Epoch 21/30, Loss: 0.2966
Epoch 22/30, Loss: 0.3019
Epoch 23/30, Loss: 0.2526
Epoch 24/30, Loss: 0.2369
Epoch 25/30, Loss: 0.2624
Epoch 26/30, Loss: 0.3409
Epoch 27/30, Loss: 0.1867
Epoch 28/30, Loss: 0.2136
Epoch 29/30, Loss: 0.1682
Epoch 30/30, Loss: 0.1811
Accuracy: 0.9754
Classification Report:
                              precision    recall  f1-score   support

            can_you_help_me       0.95      1.00      0.98        21
              doesnt_matter       0.87      1.00 

CrossEntropyLoss: A loss function used for multi-class classification problems.
AdamW: An optimizer that adjusts the learning rate dynamically to improve training performance.##################################################################################################
1-train_model: This function handles the model training process. It performs the forward pass, 2-computes the loss, and updates the model weights using backpropagation.
3-zero_grad: Clears old gradients, which are accumulated during training.
4-loss.backward: Computes gradients of the loss.
5-optimizer.step: Updates the model weights.
##################################################################################################
evaluate_model: This function evaluates the trained model on the test data. It computes the model’s predictions, calculates accuracy, and provides a classification report and confusion matrix to assess performance.
torch.no_grad(): Disables gradient calculations, which speeds up the evaluation process and reduces memory consumption.