### Real-Time Nepali Sign Language Recognition Using Skeleton-Based BiLSTM with Self-Attention Mechanism

In [3]:
import os
import numpy as np
from tqdm import tqdm # You might need to pip install tqdm for progress bars

# --- CONFIGURATION ---
INPUT_DIR = r'C:\Users\KIIT\Desktop\Major Project\processed_data_skeleton'  # Path to the directory with original .npy files
# Where to save augmented data. 
# OPTION A: Save in same folders (easier for later loading)
# OPTION B: Save separately. Let's use Option A for simplicity in loading later.
OUTPUT_DIR = INPUT_DIR 

AUGMENTATION_FACTOR = 16# How many new samples to create per original sample

def add_gaussian_noise(data, sigma=0.01):
    """Adds random noise to coordinates to simulate slight sensor inaccuracies."""
    noise = np.random.normal(loc=0, scale=sigma, size=data.shape)
    return data + noise

def scale_skeleton(data, scale_range=(0.9, 1.1)):
    """Makes the skeleton slightly bigger or smaller to simulate different distances."""
    scale_factor = np.random.uniform(scale_range[0], scale_range[1])
    return data * scale_factor

# Optional: Simple temporal shift if your data isn't perfectly trimmed
def temporal_shift(data, shift_range=(-2, 2)):
    shift = np.random.randint(shift_range[0], shift_range[1])
    if shift == 0: return data
    if shift > 0: # Shift right, pad left
        return np.vstack([np.zeros((shift, data.shape[1])), data[:-shift]])
    else: # Shift left, pad right
        return np.vstack([data[-shift:], np.zeros((-shift, data.shape[1]))])

def augment_and_save():
    if not os.path.exists(INPUT_DIR):
        print(f"Error: {INPUT_DIR} not found.")
        return

    actions = os.listdir(INPUT_DIR)
    for action in actions:
        action_path = os.path.join(INPUT_DIR, action)
        if not os.path.isdir(action_path): continue

        print(f"Augmenting class: {action}")
        # Only get original files, avoid re-augmenting augmented files if run twice
        npy_files = [f for f in os.listdir(action_path) if f.endswith('.npy') and '_aug' not in f]

        for f_name in tqdm(npy_files):
            original_data = np.load(os.path.join(action_path, f_name))
            
            for i in range(AUGMENTATION_FACTOR):
                # Apply a random combination of augmentations
                aug_data = original_data.copy()
                
                if np.random.rand() > 0.5:
                    aug_data = add_gaussian_noise(aug_data)
                if np.random.rand() > 0.5:
                    aug_data = scale_skeleton(aug_data)
                
                # Save with new name, e.g., "video1_aug0.npy"
                new_name = f"{os.path.splitext(f_name)[0]}_aug{i}.npy"
                np.save(os.path.join(action_path, new_name), aug_data)

if __name__ == '__main__':
    print("Starting augmentation...")
    augment_and_save()
    print("Augmentation complete.")

Starting augmentation...
Augmenting class: कलम


100%|██████████| 50/50 [00:01<00:00, 32.61it/s]


Augmenting class: क्षमा


100%|██████████| 50/50 [00:02<00:00, 23.41it/s]


Augmenting class: खाना


100%|██████████| 50/50 [00:01<00:00, 27.75it/s]


Augmenting class: खुशी


100%|██████████| 50/50 [00:01<00:00, 32.30it/s]


Augmenting class: गन्तव्य


100%|██████████| 50/50 [00:01<00:00, 28.98it/s]


Augmenting class: घर


100%|██████████| 50/50 [00:03<00:00, 15.49it/s]


Augmenting class: जेल


100%|██████████| 50/50 [00:04<00:00, 11.19it/s]


Augmenting class: जोरो


100%|██████████| 50/50 [00:04<00:00, 12.22it/s]


Augmenting class: टाटा


100%|██████████| 50/50 [00:03<00:00, 15.20it/s]


Augmenting class: ढोका


100%|██████████| 50/50 [00:04<00:00, 12.09it/s]


Augmenting class: नमस्ते


100%|██████████| 50/50 [00:18<00:00,  2.64it/s]


Augmenting class: पानी


100%|██████████| 50/50 [00:01<00:00, 48.06it/s]


Augmenting class: पैसा


100%|██████████| 50/50 [00:01<00:00, 46.64it/s]


Augmenting class: पौडी


100%|██████████| 50/50 [00:01<00:00, 49.85it/s]


Augmenting class: फोन


100%|██████████| 50/50 [00:01<00:00, 46.20it/s]


Augmenting class: म


100%|██████████| 50/50 [00:01<00:00, 46.00it/s]


Augmenting class: माया


100%|██████████| 50/50 [00:01<00:00, 33.51it/s]


Augmenting class: समय


100%|██████████| 50/50 [00:01<00:00, 25.77it/s]


Augmenting class: समाचार


100%|██████████| 50/50 [00:01<00:00, 26.47it/s]


Augmenting class: सर्प


100%|██████████| 50/50 [00:01<00:00, 29.67it/s]

Augmentation complete.





In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

# --- CONFIGURATION ---
DATA_PATH = 'processed_data_skeleton'
OUTPUT_FILE = 'sign_language_data.npz'
MAX_SEQUENCE_LENGTH = 75
# FEATURE_DIM will be detected automatically now

def preprocess_and_pack():
    if not os.path.exists(DATA_PATH):
        print("Data path not found.")
        return

    actions = np.array([d for d in os.listdir(DATA_PATH) if os.path.isdir(os.path.join(DATA_PATH, d))])
    action_map = {label:num for num, label in enumerate(actions)}
    
    sequences, labels = [], []
    feature_dim = None # Initialize as None to detect later

    print("Loading and standardizing data...")
    for action in actions:
        action_path = os.path.join(DATA_PATH, action)
        file_list = [f for f in os.listdir(action_path) if f.endswith('.npy')]
        
        for file_name in file_list:
            data = np.load(os.path.join(action_path, file_name))
            
            # --- AUTO-DETECT FEATURE DIMENSION ---
            if feature_dim is None:
                feature_dim = data.shape[1]
                print(f"Detected feature dimension: {feature_dim}")

            # --- STANDARDIZATION ---
            if len(data) > MAX_SEQUENCE_LENGTH:
                data = data[:MAX_SEQUENCE_LENGTH]
            elif len(data) < MAX_SEQUENCE_LENGTH:
                # Use the detected feature_dim for padding
                padding = np.zeros((MAX_SEQUENCE_LENGTH - len(data), feature_dim))
                data = np.vstack((data, padding))
            
            sequences.append(data)
            labels.append(action_map[action])

    X = np.array(sequences).astype(np.float32) # PyTorch likes float32
    
    # One-hot encode labels using pure numpy
    y = np.array(labels)
    num_classes = len(actions)
    y_onehot = np.eye(num_classes)[y]

    print(f"Combined Data Shape: {X.shape}")
    print(f"Labels Shape: {y_onehot.shape}")

    X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, stratify=y, random_state=42)

    np.savez_compressed(OUTPUT_FILE, 
                        X_train=X_train, X_test=X_test, 
                        y_train=y_train, y_test=y_test, 
                        action_labels=actions)
    print(f"Data successfully packed into {OUTPUT_FILE}")

if __name__ == '__main__':
    preprocess_and_pack()

Loading and standardizing data...
Detected feature dimension: 258


In [5]:
import torch.nn as nn
import torch.nn.functional as F

class SelfAttention(nn.Module):
    """
    Computes a weighted average of the LSTM outputs.
    This allows the model to focus on the most relevant frames for the gesture.
    """
    def __init__(self, hidden_dim):
        super(SelfAttention, self).__init__()
        self.hidden_dim = hidden_dim
        self.projection = nn.Sequential(
            nn.Linear(hidden_dim, 128), # Increased from 64
            nn.ReLU(True),
            nn.Linear(128, 1)
        )

    def forward(self, encoder_outputs):
        # encoder_outputs shape: (batch_size, sequence_length, hidden_dim)
        
        # Calculate energy for each frame
        energy = self.projection(encoder_outputs) # (batch, seq_len, 1)
        
        # Calculate weights (softmax over the sequence dimension)
        weights = F.softmax(energy.squeeze(-1), dim=1) # (batch, seq_len)
        
        # Reshape weights for broadcasting
        # (batch, 1, seq_len) * (batch, seq_len, hidden_dim) -> (batch, 1, hidden_dim)
        outputs = (encoder_outputs * weights.unsqueeze(-1)).sum(dim=1)
        
        return outputs, weights

class SignLanguageLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(SignLanguageLSTM, self).__init__()
        
        # 1. Bidirectional LSTM Layers
        self.lstm1 = nn.LSTM(input_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.ln1 = nn.LayerNorm(hidden_dim * 2)
        self.dropout1 = nn.Dropout(0.4)
        
        self.lstm2 = nn.LSTM(hidden_dim * 2, hidden_dim, batch_first=True, bidirectional=True)
        self.ln2 = nn.LayerNorm(hidden_dim * 2)
        self.dropout2 = nn.Dropout(0.4)
        
        # 2. Attention Mechanism
        # Input to attention is hidden_dim * 2 because LSTM is bidirectional
        self.attention = SelfAttention(hidden_dim * 2)
        
        # 3. Classification Head
        self.fc1 = nn.Linear(hidden_dim * 2, 128) # Increased from 64
        self.relu = nn.ReLU()
        self.dropout3 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, num_classes) # Adjusted input to match fc1 output

    def forward(self, x):
        # x shape: (batch, seq_len, input_dim)
        
        # LSTM Layers
        out, _ = self.lstm1(x)
        out = self.ln1(out)
        out = self.dropout1(out)
        
        lstm_out, _ = self.lstm2(out)
        lstm_out = self.ln2(lstm_out)
        lstm_out = self.dropout2(lstm_out)
        
        # Apply Attention
        # Instead of just taking the last state (lstm_out[:, -1, :]), 
        # we take a weighted sum of all states.
        context_vector, attention_weights = self.attention(lstm_out)
        
        # Classification
        out = self.fc1(context_vector)
        out = self.relu(out)
        out = self.dropout3(out)
        out = self.fc2(out) # Raw logits for CrossEntropyLoss
        
        return out

In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import copy
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    classification_report, 
    confusion_matrix,
    roc_curve, 
    auc, 
    precision_recall_curve, 
    average_precision_score
)
from sklearn.preprocessing import label_binarize
from itertools import cycle
from tqdm import tqdm

In [8]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import seaborn as sns

# --- FONT CONFIGURATION for Devanagari ---
# IMPORTANT: You must have a font installed that supports Devanagari.
# 'Noto Sans Devanagari' is a good free choice (from Google Fonts).
# After installing it, Matplotlib should be able to find it.
# If this doesn't work, you may need to find the specific font name
# on your system and replace 'Noto Sans Devanagari' below.
# --- FONT CONFIGURATION for Devanagari (.ttf local file) ---
FONT_PATH = "NotoSansDevanagari-Regular.ttf"

try:
    font_prop = fm.FontProperties(fname=FONT_PATH)
    plt.rcParams['font.family'] = font_prop.get_name()
    plt.rcParams['font.sans-serif'] = [font_prop.get_name()]
    plt.rcParams['axes.unicode_minus'] = False

    print(f"Devanagari font loaded: {font_prop.get_name()}")
except Exception as e:
    print("⚠️ Could not load Devanagari font from file.")
    print("Error:", e)
    print("Graphs may not render Hindi/Marathi text correctly.")
# --- END FONT CONFIGURATION ---

from sklearn.metrics import (
    classification_report, 
    confusion_matrix,
    roc_curve, 
    auc, 
    precision_recall_curve, 
    average_precision_score
)
from sklearn.preprocessing import label_binarize
from itertools import cycle
from tqdm import tqdm




# --- CONFIGURATION ---
DATA_FILE = 'sign_language_data.npz'
MODEL_FILE = 'best_sign_model_attn.pth'
HISTORY_PLOT = 'training_history.png'
CONFUSION_MATRIX_PLOT = 'confusion_matrix.png'
# New plot filenames
ROC_CURVE_PLOT = 'roc_curves.png'
PR_CURVE_PLOT = 'precision_recall_curves.png'

EPOCHS = 60
BATCH_SIZE = 32
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SignLanguageDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.LongTensor(np.argmax(y, axis=1))

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def load_data():
    if not os.path.exists(DATA_FILE):
        raise Exception(f"{DATA_FILE} not found. Run preprocessing first.")
    data = np.load(DATA_FILE, allow_pickle=True)
    return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['action_labels']

def plot_history(train_losses, val_losses, train_accs, val_accs, lrs):
    """Plots training curves and saves them to a file."""
    # Updated figsize for 3 plots
    plt.figure(figsize=(18, 5))
    
    # Loss Plot
    plt.subplot(1, 3, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Loss Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    # Accuracy Plot
    plt.subplot(1, 3, 2)
    plt.plot(train_accs, label='Train Accuracy')
    plt.plot(val_accs, label='Validation Accuracy')
    plt.title('Accuracy Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    # --- NEW: Learning Rate Plot ---
    plt.subplot(1, 3, 3)
    plt.plot(lrs, label='Learning Rate', color='orange')
    plt.title('Learning Rate Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.legend()
    plt.grid(True)
    # --- End New Plot ---
    
    plt.tight_layout()
    plt.savefig(HISTORY_PLOT)
    print(f"Training history plot saved to {HISTORY_PLOT}")
    plt.close()

def plot_roc_curves(y_true_bin, y_pred_probs, class_names):
    """Plots and saves multi-class ROC curves."""
    n_classes = len(class_names)
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred_probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute macro-average ROC curve and ROC area
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

    # Plot all ROC curves
    plt.figure(figsize=(10, 8))
    
    plt.plot(fpr["macro"], tpr["macro"],
             label=f'Macro-average ROC (AUC = {roc_auc["macro"]:.2f})',
             color='deeppink', linestyle=':', linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green', 'purple', 'red', 'blue', 'yellow', 'brown', 'pink'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=2, alpha=0.8,
                 label=f'ROC for {class_names[i]} (AUC = {roc_auc[i]:.2f})')

    plt.plot([0, 1], [0, 1], 'k--', lw=2)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Multi-class Receiver Operating Characteristic (ROC) Curves')
    plt.legend(loc="lower right", fontsize='small')
    plt.grid(True)
    plt.savefig(ROC_CURVE_PLOT)
    print(f"ROC curves plot saved to {ROC_CURVE_PLOT}")
    plt.close()

def plot_pr_curves(y_true_bin, y_pred_probs, class_names):
    """Plots and saves multi-class Precision-Recall curves."""
    n_classes = len(class_names)
    precision = dict()
    recall = dict()
    avg_precision = dict()

    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_true_bin[:, i], y_pred_probs[:, i])
        avg_precision[i] = average_precision_score(y_true_bin[:, i], y_pred_probs[:, i])

    # Compute macro-average AUPRC
    avg_precision["macro"] = average_precision_score(y_true_bin, y_pred_probs, average="macro")

    # Plot all P-R curves
    plt.figure(figsize=(10, 8))
    
    # Plot macro-average (for the metric, plotting the curve is complex)
    # We will plot individual curves and report the macro-average AP
    
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green', 'purple', 'red', 'blue', 'yellow', 'brown', 'pink'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(recall[i], precision[i], color=color, lw=2, alpha=0.8,
                 label=f'P-R for {class_names[i]} (AP = {avg_precision[i]:.2f})')

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title(f'Multi-class Precision-Recall Curves (Macro-AP = {avg_precision["macro"]:.2f})')
    plt.legend(loc="best", fontsize='small')
    plt.grid(True)
    plt.savefig(PR_CURVE_PLOT)
    print(f"P-R curves plot saved to {PR_CURVE_PLOT}")
    plt.close()


def evaluate_model(model, test_loader, class_names):
    """
    Generates a confusion matrix, classification report, ROC curves,
    and Precision-Recall curves on the test set.
    """
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []  # <-- NEW: Store probabilities
    
    n_classes = len(class_names)
    
    print("\nRunning final evaluation on test set...")
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Evaluating"):
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            
            # Get probabilities using softmax
            probs = torch.softmax(outputs, dim=1)
            
            # Get hard predictions
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())
            all_probs.extend(probs.cpu().numpy()) # <-- NEW
    
    # Convert lists to numpy arrays for sklearn
    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    all_probs = np.array(all_probs)
    
    # Binarize the labels for multi-class ROC/PR
    y_true_bin = label_binarize(all_labels, classes=range(n_classes))
    
    # 1. Classification Report (Precision, Recall, F1-Score)
    print("\n--- Classification Report ---")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    # 2. Confusion Matrix Plot
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig(CONFUSION_MATRIX_PLOT)
    print(f"Confusion matrix saved to {CONFUSION_MATRIX_PLOT}")
    plt.close()
    
    # 3. --- NEW: Plot ROC Curves ---
    plot_roc_curves(y_true_bin, all_probs, class_names)
    
    # 4. --- NEW: Plot P-R Curves ---
    plot_pr_curves(y_true_bin, all_probs, class_names)


def train_model():
    print(f"Training on device: {DEVICE}")
    X_train, X_test, y_train, y_test, actions = load_data()
    
    input_dim = X_train.shape[2]
    num_classes = len(actions)
    print(f"Input Dim: {input_dim}, Classes: {num_classes}")

    train_dataset = SignLanguageDataset(X_train, y_train)
    test_dataset = SignLanguageDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # This line now correctly imports your model
    model = SignLanguageLSTM(input_dim=input_dim, hidden_dim=64, num_classes=num_classes).to(DEVICE)
    
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

    best_val_loss = float('inf')
    patience = 15
    patience_counter = 0
    
    # Add 'lr' to history tracking
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'lr': []}

    for epoch in range(EPOCHS):
        # --- TRAINING LOOP ---
        model.train()
        train_loss, train_correct = 0.0, 0
        train_loop = tqdm(train_loader, desc=f'Epoch {epoch+1}/{EPOCHS} [Train]', leave=False)
        for inputs, labels in train_loop:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            train_correct += torch.sum(preds == labels.data)
            train_loop.set_postfix(loss=loss.item())

        # --- VALIDATION LOOP ---
        model.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += torch.sum(preds == labels.data)
        
        epoch_train_loss = train_loss / len(train_dataset)
        epoch_train_acc = (train_correct.double() / len(train_dataset)).cpu().item()
        epoch_val_loss = val_loss / len(test_dataset)
        epoch_val_acc = (val_correct.double() / len(test_dataset)).cpu().item()
        
        # Get the learning rate *before* the scheduler steps
        current_lr = optimizer.param_groups[0]['lr']
        
        history['train_loss'].append(epoch_train_loss)
        history['val_loss'].append(epoch_val_loss)
        history['train_acc'].append(epoch_train_acc)
        history['val_acc'].append(epoch_val_acc)
        history['lr'].append(current_lr) # <-- Log the LR
        
        print(f'Epoch {epoch+1:03d} | Train Loss: {epoch_train_loss:.4f} Acc: {epoch_train_acc:.4f} | Val Loss: {epoch_val_loss:.4f} Acc: {epoch_val_acc:.4f} | LR: {current_lr:.6f}')
        
        # Step the scheduler *after* logging the LR for the epoch
        scheduler.step(epoch_val_loss)
        new_lr = optimizer.param_groups[0]['lr']
        if new_lr != current_lr:
            print(f"Epoch {epoch+1}: Learning rate reduced from {current_lr} to {new_lr}")

        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_FILE)
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs.")
                break

    print(f"Training complete. Best model saved to {MODEL_FILE}")
    
    # Pass the LR history to the plotting function
    plot_history(history['train_loss'], history['val_loss'], history['train_acc'], history['val_acc'], history['lr'])
    
    print("Loading best model for final evaluation...")
    model.load_state_dict(torch.load(MODEL_FILE))
    evaluate_model(model, test_loader, actions)

if __name__ == '__main__':
    train_model()

Devanagari font loaded: Noto Sans Devanagari
Training on device: cpu
Input Dim: 258, Classes: 20


                                                                                 

Epoch 001 | Train Loss: 1.5925 Acc: 0.6007 | Val Loss: 0.9715 Acc: 0.8426 | LR: 0.001000


                                                                                 

Epoch 002 | Train Loss: 0.8865 Acc: 0.9060 | Val Loss: 0.8076 Acc: 0.9206 | LR: 0.001000


                                                                                 

Epoch 003 | Train Loss: 0.7700 Acc: 0.9532 | Val Loss: 0.6722 Acc: 0.9771 | LR: 0.001000


                                                                                 

Epoch 004 | Train Loss: 0.7303 Acc: 0.9651 | Val Loss: 0.6842 Acc: 0.9688 | LR: 0.001000


                                                                                 

Epoch 005 | Train Loss: 0.6973 Acc: 0.9772 | Val Loss: 0.8179 Acc: 0.9197 | LR: 0.001000


                                                                                 

Epoch 006 | Train Loss: 0.6763 Acc: 0.9835 | Val Loss: 0.6364 Acc: 0.9882 | LR: 0.001000


                                                                                 

Epoch 007 | Train Loss: 0.6540 Acc: 0.9909 | Val Loss: 0.7226 Acc: 0.9618 | LR: 0.001000


                                                                                 

Epoch 008 | Train Loss: 0.6475 Acc: 0.9923 | Val Loss: 0.6045 Acc: 0.9976 | LR: 0.001000


                                                                                 

Epoch 009 | Train Loss: 0.6623 Acc: 0.9869 | Val Loss: 0.6430 Acc: 0.9856 | LR: 0.001000


                                                                                  

Epoch 010 | Train Loss: 0.6506 Acc: 0.9912 | Val Loss: 0.6093 Acc: 0.9953 | LR: 0.001000


                                                                                  

Epoch 011 | Train Loss: 0.6477 Acc: 0.9926 | Val Loss: 0.6223 Acc: 0.9921 | LR: 0.001000


                                                                                  

Epoch 012 | Train Loss: 0.6352 Acc: 0.9955 | Val Loss: 0.7031 Acc: 0.9624 | LR: 0.001000


                                                                                  

Epoch 013 | Train Loss: 0.6520 Acc: 0.9890 | Val Loss: 0.6007 Acc: 0.9994 | LR: 0.001000


                                                                                  

Epoch 014 | Train Loss: 0.6419 Acc: 0.9929 | Val Loss: 0.6032 Acc: 0.9985 | LR: 0.001000


                                                                                  

Epoch 015 | Train Loss: 0.6223 Acc: 0.9988 | Val Loss: 0.5975 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 016 | Train Loss: 0.6358 Acc: 0.9947 | Val Loss: 0.6187 Acc: 0.9929 | LR: 0.001000


                                                                                  

Epoch 017 | Train Loss: 0.6343 Acc: 0.9957 | Val Loss: 0.6121 Acc: 0.9953 | LR: 0.001000


                                                                                  

Epoch 018 | Train Loss: 0.6430 Acc: 0.9928 | Val Loss: 0.6002 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 019 | Train Loss: 0.6305 Acc: 0.9965 | Val Loss: 0.5974 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 020 | Train Loss: 0.6309 Acc: 0.9956 | Val Loss: 0.6031 Acc: 0.9974 | LR: 0.001000


                                                                                  

Epoch 021 | Train Loss: 0.6358 Acc: 0.9943 | Val Loss: 0.6195 Acc: 0.9938 | LR: 0.001000


                                                                                  

Epoch 022 | Train Loss: 0.6289 Acc: 0.9970 | Val Loss: 0.5968 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 023 | Train Loss: 0.6238 Acc: 0.9982 | Val Loss: 0.6135 Acc: 0.9932 | LR: 0.001000


                                                                                  

Epoch 024 | Train Loss: 0.6326 Acc: 0.9952 | Val Loss: 0.6066 Acc: 0.9971 | LR: 0.001000


                                                                                  

Epoch 025 | Train Loss: 0.6242 Acc: 0.9977 | Val Loss: 0.5981 Acc: 1.0000 | LR: 0.001000


                                                                                    

Epoch 026 | Train Loss: 0.6168 Acc: 1.0000 | Val Loss: 0.5959 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 027 | Train Loss: 0.6168 Acc: 1.0000 | Val Loss: 0.5972 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 028 | Train Loss: 0.6758 Acc: 0.9821 | Val Loss: 0.6005 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 029 | Train Loss: 0.6292 Acc: 0.9960 | Val Loss: 0.5989 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 030 | Train Loss: 0.6187 Acc: 0.9995 | Val Loss: 0.5955 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 031 | Train Loss: 0.6184 Acc: 0.9996 | Val Loss: 0.5977 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 032 | Train Loss: 0.6323 Acc: 0.9959 | Val Loss: 0.7459 Acc: 0.9579 | LR: 0.001000


                                                                                  

Epoch 033 | Train Loss: 0.6233 Acc: 0.9981 | Val Loss: 0.5975 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 034 | Train Loss: 0.6245 Acc: 0.9972 | Val Loss: 0.7178 Acc: 0.9582 | LR: 0.001000


                                                                                  

Epoch 035 | Train Loss: 0.6320 Acc: 0.9957 | Val Loss: 0.5975 Acc: 1.0000 | LR: 0.001000


                                                                                  

Epoch 036 | Train Loss: 0.6272 Acc: 0.9965 | Val Loss: 0.5986 Acc: 0.9991 | LR: 0.001000
Epoch 36: Learning rate reduced from 0.001 to 0.0005


                                                                                  

Epoch 037 | Train Loss: 0.6152 Acc: 1.0000 | Val Loss: 0.5953 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 038 | Train Loss: 0.6154 Acc: 1.0000 | Val Loss: 0.5957 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 039 | Train Loss: 0.6152 Acc: 1.0000 | Val Loss: 0.5954 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 040 | Train Loss: 0.6145 Acc: 1.0000 | Val Loss: 0.5954 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 041 | Train Loss: 0.6148 Acc: 1.0000 | Val Loss: 0.5950 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 042 | Train Loss: 0.6147 Acc: 1.0000 | Val Loss: 0.5950 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 043 | Train Loss: 0.6251 Acc: 0.9965 | Val Loss: 0.6122 Acc: 0.9956 | LR: 0.000500


                                                                                  

Epoch 044 | Train Loss: 0.6199 Acc: 0.9987 | Val Loss: 0.5954 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 045 | Train Loss: 0.6147 Acc: 1.0000 | Val Loss: 0.5961 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 046 | Train Loss: 0.6146 Acc: 1.0000 | Val Loss: 0.5951 Acc: 1.0000 | LR: 0.000500


                                                                                  

Epoch 047 | Train Loss: 0.6228 Acc: 0.9973 | Val Loss: 0.5956 Acc: 1.0000 | LR: 0.000500
Epoch 47: Learning rate reduced from 0.0005 to 0.00025


                                                                                  

Epoch 048 | Train Loss: 0.6142 Acc: 1.0000 | Val Loss: 0.5955 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 049 | Train Loss: 0.6144 Acc: 1.0000 | Val Loss: 0.5947 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 050 | Train Loss: 0.6141 Acc: 1.0000 | Val Loss: 0.5954 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 051 | Train Loss: 0.6141 Acc: 1.0000 | Val Loss: 0.5951 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 052 | Train Loss: 0.6141 Acc: 1.0000 | Val Loss: 0.5945 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 053 | Train Loss: 0.6142 Acc: 1.0000 | Val Loss: 0.5948 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 054 | Train Loss: 0.6138 Acc: 1.0000 | Val Loss: 0.5948 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 055 | Train Loss: 0.6138 Acc: 1.0000 | Val Loss: 0.5945 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 056 | Train Loss: 0.6137 Acc: 1.0000 | Val Loss: 0.5950 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 057 | Train Loss: 0.6141 Acc: 1.0000 | Val Loss: 0.5946 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 058 | Train Loss: 0.6152 Acc: 0.9996 | Val Loss: 0.5948 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 059 | Train Loss: 0.6136 Acc: 1.0000 | Val Loss: 0.5943 Acc: 1.0000 | LR: 0.000250


                                                                                  

Epoch 060 | Train Loss: 0.6140 Acc: 1.0000 | Val Loss: 0.5954 Acc: 1.0000 | LR: 0.000250
Training complete. Best model saved to best_sign_model_attn.pth


findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not

Training history plot saved to training_history.png
Loading best model for final evaluation...

Running final evaluation on test set...


Evaluating: 100%|██████████| 107/107 [00:02<00:00, 50.44it/s]
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
findfont: Font


--- Classification Report ---
              precision    recall  f1-score   support

         कलम       1.00      1.00      1.00       170
       क्षमा       1.00      1.00      1.00       170
        खाना       1.00      1.00      1.00       170
        खुशी       1.00      1.00      1.00       170
     गन्तव्य       1.00      1.00      1.00       170
          घर       1.00      1.00      1.00       170
         जेल       1.00      1.00      1.00       170
        जोरो       1.00      1.00      1.00       170
        टाटा       1.00      1.00      1.00       170
        ढोका       1.00      1.00      1.00       170
      नमस्ते       1.00      1.00      1.00       170
        पानी       1.00      1.00      1.00       170
        पैसा       1.00      1.00      1.00       170
        पौडी       1.00      1.00      1.00       170
         फोन       1.00      1.00      1.00       170
           म       1.00      1.00      1.00       170
        माया       1.00      1.00      1.00       

  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
  fig.canvas.draw()
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanag

Confusion matrix saved to confusion_matrix.png


findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(ROC_CURVE_PLOT)
  plt.savefig(ROC_CURVE_PLOT)
findfont: Fo

ROC curves plot saved to roc_curves.png


findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
  plt.savefig(PR_CURVE_PLOT)
findfont: Font family 'Noto Sans Devanagari' not found.
  plt.savefig(PR_CURVE_PLOT)
  p

P-R curves plot saved to precision_recall_curves.png


In [None]:
import cv2
import numpy as np
import torch
import mediapipe as mp
import os
import time



# --- CONFIGURATION ---
MODEL_FILE = 'best_sign_model_attn.pth'
DATA_FILE = 'sign_language_data.npz' 
SEQUENCE_LENGTH = 75 # Must match preprocessing
FEATURE_DIM = 258      # Must match preprocessing (33*4 + 21*3 + 21*3)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

PROCESS_WIDTH = 640
PROCESS_HEIGHT = 480

# --- NEW: FPS Control ---
TARGET_FPS = 30
FRAME_TIME = 1.0 / TARGET_FPS # Time between frames (approx 0.033s)

# --- MEDIAPIPE SETUP ---
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    """
    Flips, resizes, and processes the image with MediaPipe.
    Returns the *original* flipped image and the results.
    """
    image = cv2.flip(image, 1) # Flip for selfie view
    
    # Resize for performance
    process_image = cv2.resize(image, (PROCESS_WIDTH, PROCESS_HEIGHT))
    process_image = cv2.cvtColor(process_image, cv2.COLOR_BGR2RGB)
    
    process_image.flags.writeable = False
    results = model.process(process_image)
    process_image.flags.writeable = True
    
    # Return the original flipped image for drawing
    return image, results

def extract_keypoints(results):
    """Extracts keypoints into a single numpy array (FEATURE_DIM)."""
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

def load_classes():
    """Loads action labels from the .npz file."""
    if not os.path.exists(DATA_FILE):
        raise Exception(f"{DATA_FILE} not found. Cannot load class names.")
    data = np.load(DATA_FILE, allow_pickle=True)
    return data['action_labels']

def draw_styled_landmarks(image, results):
    """Draws MediaPipe landmarks with custom styles."""
    # Draw left hand
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    # Draw right hand
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))
    # Draw pose
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2))

def main():
    actions = load_classes()
    num_classes = len(actions)
    
    # Load your model structure
    model = SignLanguageLSTM(input_dim=FEATURE_DIM, hidden_dim=64, num_classes=num_classes).to(DEVICE)
    
    try:
        model.load_state_dict(torch.load(MODEL_FILE, map_location=DEVICE))
    except RuntimeError as e:
        print(f"\nCRITICAL ERROR: {e}")
        print("Model architecture mismatch. Did you change the model script?")
        print(f"Expected input_dim={FEATURE_DIM}, num_classes={num_classes}. Check your model.py.")
        return
    except FileNotFoundError:
        print(f"\nCRITICAL ERROR: {MODEL_FILE} not found.")
        print("Make sure you have trained the model and the file is in the correct location.")
        return

    model.eval()
    print("Model loaded. Ready for prediction.")

    cap = cv2.VideoCapture(0)
    # Try to set camera to target FPS
    cap.set(cv2.CAP_PROP_FPS, TARGET_FPS)
    
    # State variables
    video_frames_buffer = [] # NEW: Stores raw video frames
    state = "IDLE" # "IDLE", "RECORDING", "PROCESSING"
    last_prediction = "---"
    prediction_conf = 0.0
    last_frame_time = 0.0
    
    with mp_holistic.Holistic(
        min_detection_confidence=0.5, 
        min_tracking_confidence=0.5, 
        smooth_landmarks=True) as holistic:
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret: 
                print("Failed to grab frame from camera.")
                break

            key = cv2.waitKey(1) & 0xFF
            current_time = time.time()
            
            # --- State Machine ---
            
            if state == "IDLE":
                # --- IDLE STATE ---
                # Process and display live skeleton
                image, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(image, results)
                
                # Draw "Press R" text
                cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
                cv2.putText(image, "Press 'R' to Record", (150, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                
                if key == ord('r'):
                    video_frames_buffer = [] # Clear buffer
                    state = "RECORDING"
                    last_prediction = "..." # Show we are working
                    prediction_conf = 0.0
                    last_frame_time = current_time # Start timer
            
            elif state == "RECORDING":
                # --- RECORDING STATE ---
                # Enforce 30 FPS capture rate
                if (current_time - last_frame_time) < FRAME_TIME:
                    # Skip this loop iteration, wait for next frame time
                    # We still need to show *something*, so re-display the last frame
                    # This is a bit complex, let's simplify and just capture
                    pass # We'll just capture as fast as possible up to 75

                # This logic is simpler: capture 75 frames, paced by the loop
                # This assumes the loop runs fast enough
                
                # --- NEW FPS-Paced Logic ---
                if (current_time - last_frame_time) >= FRAME_TIME:
                    last_frame_time = current_time
                    
                    # Store the *raw* frame for later processing
                    video_frames_buffer.append(frame) 
                    
                # Display simple flipped image (NO SKELETON)
                image = cv2.flip(frame, 1)
                
                # Draw "Recording..." text
                cv2.rectangle(image, (0, 0), (640, 40), (0, 0, 255), -1)
                cv2.putText(image, f"Recording... {len(video_frames_buffer)}/{SEQUENCE_LENGTH}", (100, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                
                # --- Check if buffer is full ---
                if len(video_frames_buffer) == SEQUENCE_LENGTH:
                    state = "PROCESSING"

            elif state == "PROCESSING":
                # --- PROCESSING STATE (runs once) ---
                
                # 1. Display a "Processing..." message on the live feed
                image, results = mediapipe_detection(frame, holistic) # Get live frame
                draw_styled_landmarks(image, results) # Draw live skeleton
                cv2.rectangle(image, (0, 0), (640, 40), (128, 0, 128), -1)
                cv2.putText(image, "Processing...", (200, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.imshow('Sign Language Prediction', image)
                cv2.waitKey(1) # Force redraw

                # 2. Process the *entire* buffer of 75 frames
                skeleton_buffer = []
                for recorded_frame in video_frames_buffer:
                    # Process each stored frame
                    _, process_results = mediapipe_detection(recorded_frame, holistic)
                    keypoints = extract_keypoints(process_results)
                    skeleton_buffer.append(keypoints)
                
                # 3. --- PREDICTION ---
                np_frames = np.array(skeleton_buffer)
                input_data = torch.FloatTensor(np.expand_dims(np_frames, axis=0)).to(DEVICE)
                
                with torch.no_grad():
                    output = model(input_data)
                    probs = torch.nn.functional.softmax(output, dim=1)
                    conf, predicted_idx = torch.max(probs, 1)
                    
                    prediction_conf = conf.item()
                    last_prediction = actions[predicted_idx.item()]
                
                # 4. Reset state and clear buffer
                state = "IDLE"
                video_frames_buffer = []
                # 'image' will be recreated in the next IDLE loop
                continue # Jump to next loop iteration
            
            # --- UI DRAWING (common to all states) ---
            
            h, w, _ = image.shape
            
            # 1. Show a progress bar (only visible during recording)
            if state == "RECORDING":
                progress = len(video_frames_buffer) / SEQUENCE_LENGTH
                cv2.rectangle(image, (50, h - 80), (w - 50, h - 50), (200, 200, 200), -1)
                cv2.rectangle(image, (50, h - 80), (int(50 + (w - 100) * progress), h - 50), (0, 0, 255), -1)

            # 2. Show Last Prediction
            color = (0, 255, 0) if prediction_conf > 0.7 else (0, 165, 255)
            
            display_text = f"Result: {last_prediction}"
            print(last_prediction)
            if prediction_conf > 0: 
                display_text += f" ({prediction_conf:.2f})"
            
            cv2.rectangle(image, (0, h - 40), (w, h), (50, 50, 50), -1)
            cv2.putText(image, display_text, (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

            cv2.imshow('Sign Language Prediction', image)

            # 'q' to quit
            if key == ord('q'): 
                break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    main()

Model loaded. Ready for prediction.
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
---
