# ASL Recognition: End-to-End Workflow

This notebook consolidates the entire workflow for preprocessing, model training, and evaluation for American Sign Language (ASL) recognition.


In [None]:
# Import Required Libraries
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Preprocessing: Load and Normalize Keypoints

We will load the preprocessed keypoints from the JSON files and normalize them.


In [None]:
# Configuration
DATA_DIR = Path(r'f:\Uni_Stuff\6th_Sem\DL\Proj\video-asl-recognition\pose_estimation\data\keypoints')

# Load and normalize keypoints
def load_keypoints(data_dir):
    data = []
    labels = []
    for label_dir in tqdm(data_dir.iterdir(), desc="Loading labels"):
        if not label_dir.is_dir():
            continue
        for json_file in label_dir.glob('*.json'):
            with open(json_file, 'r') as f:
                content = json.load(f)
                keypoints = content['keypoints']
                label = content['label']
                # Normalize keypoints
                keypoints = np.array(keypoints)
                keypoints[:, :, :2] = keypoints[:, :, :2] / 1.0  # Assuming already normalized
                data.append(keypoints)
                labels.append(label)
    return np.array(data), np.array(labels)

# Load data
data, labels = load_keypoints(DATA_DIR)

# Dataset Preparation: Train-Test Split

Split the dataset into training, validation, and test sets.


In [None]:
# Split the dataset
X_train, X_temp, y_train, y_temp = train_test_split(data, labels, test_size=0.3, stratify=labels, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")

In [None]:
# Define the dataset class
class ASLDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Create the datasets
train_dataset = ASLDataset(X_train, y_train)
val_dataset = ASLDataset(X_val, y_val)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model Definition: BiLSTM for Temporal Data

Define a BiLSTM model for ASL recognition.


In [None]:
import torch
import torch.nn as nn
import numpy as np

# Define the model with Batch Normalization
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BiLSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size * 2)
        self.dropout1 = nn.Dropout(0.3)
        self.lstm2 = nn.LSTM(hidden_size * 2, hidden_size, batch_first=True, bidirectional=True)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size * 2)
        self.dropout2 = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_size * 2, output_size)
    
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.batch_norm1(x.transpose(1, 2)).transpose(1, 2)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.batch_norm2(x.transpose(1, 2)).transpose(1, 2)
        x = self.dropout2(x)
        x = self.fc(x[:, -1, :])
        return x

input_size = X_train.shape[2]
hidden_size = 128
output_size = len(np.unique(labels))
model = BiLSTM(input_size, hidden_size, output_size)
print(model)
# Move model to selected device
model = model.to(device)

# Hyperparameter Tuning

Tune Hyperparams.


In [None]:
# Hyperparameter Tuning Code

# Define a small grid of hyperparameters to search over
learning_rates = [0.001, 0.0005]
hidden_sizes = [64, 128]
num_epochs = 10  # Fewer epochs for tuning
tuning_patience = 3

best_val_loss = float('inf')
best_params = {}
results = {}

for lr in learning_rates:
    for hidden in hidden_sizes:
        print(f"\nTesting configuration: lr={lr}, hidden_size={hidden}")
        # Create a new model instance with the current hyperparameters
        model_tuned = BiLSTM(input_size, hidden, output_size).to(device)
        optimizer_tuned = optim.Adam(model_tuned.parameters(), lr=lr)
        criterion_tuned = nn.CrossEntropyLoss()
        
        curr_best_val_loss = float('inf')
        early_stop_counter = 0

        for epoch in range(num_epochs):
            model_tuned.train()
            train_loss = 0
            for batch in train_loader:
                inputs, targets = batch
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer_tuned.zero_grad()
                outputs = model_tuned(inputs)
                loss = criterion_tuned(outputs, targets)
                loss.backward()
                optimizer_tuned.step()
                train_loss += loss.item()
            train_loss /= len(train_loader)
            
            model_tuned.eval()
            val_loss = 0
            with torch.no_grad():
                for batch in val_loader:
                    inputs, targets = batch
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model_tuned(inputs)
                    loss = criterion_tuned(outputs, targets)
                    val_loss += loss.item()
            val_loss /= len(val_loader)
            print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
            
            if val_loss < curr_best_val_loss:
                curr_best_val_loss = val_loss
                early_stop_counter = 0
            else:
                early_stop_counter += 1
                if early_stop_counter >= tuning_patience:
                    print("Early stopping for this configuration.")
                    break
        
        results[(lr, hidden)] = curr_best_val_loss
        if curr_best_val_loss < best_val_loss:
            best_val_loss = curr_best_val_loss
            best_params = {'learning_rate': lr, 'hidden_size': hidden}

print("\nHyperparameter Tuning Results:")
for (lr, hidden), loss in results.items():
    print(f"lr: {lr}, hidden_size: {hidden} => Val Loss: {loss:.4f}")

print(f"\nBest hyperparameters: {best_params} with Val Loss: {best_val_loss:.4f}")

# Model Training

Train the model on the training set and validate on the validation set.


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

# Use best hyperparameters from tuning
# Get the best hyperparameters
best_lr = best_params['learning_rate']
best_hidden_size = best_params['hidden_size']

# Recreate model with best hyperparameters
model = BiLSTM(input_size, best_hidden_size, output_size).to(device)
print(f"Training with best hyperparameters: lr={best_lr}, hidden_size={best_hidden_size}")

# Define the loss function and optimizer with best learning rate
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=best_lr)

# Training with Early Stopping
train_losses = []
val_losses = []
patience = 5
early_stop_counter = 0
best_val_loss = float('inf')
epochs = 20
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    # Save the model if validation loss improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_model.pth")
        print("Model checkpoint saved!")
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        print(f"Early stopping counter: {early_stop_counter}/{patience}")
    if early_stop_counter >= patience:
        print("Early stopping triggered.")
        break

In [None]:
# Plot Training and Validation Loss Curves
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Curves")
plt.legend()
plt.show()

# Evaluation

Evaluate the model on the test set and display metrics.


In [None]:
# Evaluate the model
test_dataset = ASLDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for batch in test_loader:
        inputs, targets = batch
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == targets).sum().item()
        total += targets.size(0)
test_loss /= len(test_loader)
accuracy = correct / total
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Save the model for transfer learning
torch.save({
    'model_state_dict': model.state_dict(),
    'input_size': input_size,
    'hidden_size': hidden_size,
    'output_size': output_size
}, "transfer_learning_model.pth")
print("Model saved for transfer learning!")

# Inference

Test the model on new data or perform real-time inference.


In [None]:
# Example inference
model.eval()
sample = torch.tensor(X_test[0:1], dtype=torch.float32)
with torch.no_grad():
    prediction = model(sample)
    predicted_label = torch.argmax(prediction, dim=1).item()
print(f"Predicted label: {predicted_label}, True label: {y_test[0]}")