# ASL Recognition: End-to-End Workflow

This notebook consolidates the entire workflow for preprocessing, model training, and evaluation for American Sign Language (ASL) recognition.


In [1]:
# Import Required Libraries
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm  # Use notebook-friendly version of tqdm

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Preprocessing: Load and Normalize Keypoints

We will load the preprocessed keypoints from the JSON files and normalize them.


In [2]:
# Configuration
DATA_DIR = Path(r'f:\Uni_Stuff\6th_Sem\DL\Proj\video-asl-recognition\pose_estimation\data\keypoints')

# Load and normalize keypoints
# Now also return sequence lengths for masking

def load_keypoints(data_dir):
    data = []
    labels = []
    lengths = []
    skipped_files = 0
    processed_files = 0
    print(f"Loading keypoints from {data_dir}")
    # Gather all json files from all label directories
    all_json_files = []
    for label_dir in data_dir.iterdir():
        if label_dir.is_dir():
            all_json_files.extend(list(label_dir.glob('*.json')))
    for json_file in tqdm(all_json_files, desc="Loading keypoint files"):
        try:
            with open(json_file, 'r') as f:
                content = json.load(f)
                if 'keypoints' not in content or 'label' not in content:
                    skipped_files += 1
                    continue
                keypoints = content['keypoints']
                label = content['label']
                processed_frames = []
                for frame in keypoints:
                    left_hand_features = np.zeros(63, dtype=np.float32)
                    right_hand_features = np.zeros(63, dtype=np.float32)
                    pose_features = np.zeros(99, dtype=np.float32)
                    if 'hands' in frame and frame['hands']:
                        for i, hand in enumerate(frame['hands']):
                            if i < 2:
                                hand_features = []
                                for point in hand:
                                    if isinstance(point, list) and len(point) == 3:
                                        hand_features.extend(point)
                                if i == 0 and len(hand_features) <= 63:
                                    left_hand_features[:len(hand_features)] = hand_features
                                elif i == 1 and len(hand_features) <= 63:
                                    right_hand_features[:len(hand_features)] = hand_features
                    if 'pose' in frame and frame['pose']:
                        pose_data = []
                        for point in frame['pose']:
                            if isinstance(point, list) and len(point) == 3:
                                pose_data.extend(point)
                        if len(pose_data) <= 99:
                            pose_features[:len(pose_data)] = pose_data
                    frame_features = np.concatenate([left_hand_features, right_hand_features, pose_features])
                    processed_frames.append(frame_features)
                if processed_frames:
                    processed_data = np.array(processed_frames, dtype=np.float32)
                    if processed_data.shape[0] > 0 and processed_data.shape[1] > 0:
                        data.append(processed_data)
                        labels.append(label)
                        lengths.append(processed_data.shape[0])
                        processed_files += 1
                    else:
                        skipped_files += 1
                else:
                    skipped_files += 1
        except Exception as e:
            skipped_files += 1
    print(f"Processed {processed_files} files successfully, skipped {skipped_files} files")
    if not data:
        print("Warning: No valid data was loaded!")
        return np.array([]), np.array([]), np.array([])
    max_seq_len = max(sample.shape[0] for sample in data)
    feature_dim = data[0].shape[1]
    padded_data = []
    for sample in data:
        if sample.shape[0] < max_seq_len:
            padding = np.zeros((max_seq_len - sample.shape[0], feature_dim), dtype=np.float32)
            padded_sample = np.vstack((sample, padding))
        else:
            padded_sample = sample[:max_seq_len]
        padded_data.append(padded_sample)
    return np.array(padded_data), np.array(labels), np.array(lengths)

data, labels, lengths = load_keypoints(DATA_DIR)

Loading keypoints from f:\Uni_Stuff\6th_Sem\DL\Proj\video-asl-recognition\pose_estimation\data\keypoints


Loading keypoint files:   0%|          | 0/3202 [00:00<?, ?it/s]

Processed 3202 files successfully, skipped 0 files


In [7]:
print("First 5 labels:")
print(labels[:5])
print("First 5 lengths:")
print(lengths[:5])


# After data, labels, lengths = load_keypoints(DATA_DIR)
print("Raw data statistics:")
if data.size > 0:
    # Print shapes of the first few samples
    print("Number of samples:", len(data))
    print("Shape of sample 0:", data[0].shape)
    # Compute min, max and mean for the first sample (raw frames)
    sample0 = data[0]
    print("Sample 0 - min:", np.min(sample0), "max:", np.max(sample0), "mean:", np.mean(sample0))
    # Print stddev for all raw data
    print("Raw data stddev:", np.std(data))
else:
    print("No data loaded.")

print("Label distribution:")
unique, counts = np.unique(labels, return_counts=True)
for l, count in zip(unique, counts):
    print(f"Label: {l}, Count: {count}")

print("Sequence lengths statistics:")
print("Min length:", np.min(lengths), "Max length:", np.max(lengths), "Mean length:", np.mean(lengths))

First 5 labels:
['about' 'about' 'about' 'about' 'about']
First 5 lengths:
[60 60 60 60 60]
Raw data statistics:
Number of samples: 3202
Shape of sample 0: (60, 225)
Sample 0 - min: 0.0 max: 1.0 mean: 0.5605123
Raw data stddev: 0.13998298
Label distribution:
Label: about, Count: 8
Label: accident, Count: 13
Label: africa, Count: 13
Label: again, Count: 10
Label: all, Count: 13
Label: always, Count: 9
Label: animal, Count: 10
Label: apple, Count: 13
Label: approve, Count: 11
Label: argue, Count: 10
Label: arrive, Count: 10
Label: baby, Count: 10
Label: back, Count: 7
Label: backpack, Count: 11
Label: bad, Count: 11
Label: bake, Count: 8
Label: balance, Count: 11
Label: ball, Count: 11
Label: banana, Count: 10
Label: bar, Count: 10
Label: basketball, Count: 12
Label: bath, Count: 10
Label: bathroom, Count: 10
Label: beard, Count: 10
Label: because, Count: 7
Label: bed, Count: 13
Label: before, Count: 17
Label: behind, Count: 9
Label: bird, Count: 12
Label: birthday, Count: 9
Label: black

In [4]:
# Feature normalization (per-sample min-max scaling)
def normalize_features(data):
    # data: (num_samples, seq_len, num_features)
    data_min = data.min(axis=(1, 2), keepdims=True)
    data_max = data.max(axis=(1, 2), keepdims=True)
    # Avoid division by zero
    denom = np.where((data_max - data_min) == 0, 1, data_max - data_min)
    normalized = (data - data_min) / denom
    return normalized, data_min, data_max

data, feat_min, feat_max = normalize_features(data)
print(f"Data shape after normalization: {data.shape}")

def create_mask(lengths, max_len):
    # lengths: (num_samples,)
    # returns mask: (num_samples, max_len) with 1 for real, 0 for pad
    mask = np.zeros((len(lengths), max_len), dtype=np.float32)
    for i, l in enumerate(lengths):
        mask[i, :l] = 1.0
    return mask

mask = create_mask(lengths, data.shape[1])

Data shape after normalization: (3202, 60, 225)


In [8]:
# After data, feat_center, feat_scale = normalize_features(data)
print("Normalized data statistics:")
if data.size > 0:
    print("Data shape:", data.shape)
    print("Min value:", np.min(data), "Max value:", np.max(data), "Median value:", np.median(data), "Mean value:", np.mean(data), "Std Dev:", np.std(data))
else:
    print("No data available after normalization.")

Normalized data statistics:
Data shape: (3202, 60, 225)
Min value: 0.0 Max value: 1.0 Median value: 0.5581554 Mean value: 0.56632435 Std Dev: 0.13998298


# Dataset Preparation: Train-Test Split

Split the dataset into training, validation, and test sets.


In [10]:
# Import train_test_split if not already imported
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler

# Create a mapping from string labels to numeric indices
label_to_index = {label: idx for idx, label in enumerate(np.unique(labels))}
index_to_label = {idx: label for label, idx in label_to_index.items()}
print(f"Label to index mapping: {label_to_index}")

# Convert string labels to numeric indices
numeric_labels = np.array([label_to_index[label] for label in labels])
print(f"Converted labels to numeric indices.")

# Split the dataset
X_train, X_temp, y_train, y_temp, mask_train, mask_temp = train_test_split(data, numeric_labels, mask, test_size=0.3, stratify=numeric_labels, random_state=42)
X_val, X_test, y_val, y_test, mask_val, mask_test = train_test_split(X_temp, y_temp, mask_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")

# Compute class weights for balancing
class_weights = compute_class_weight('balanced', classes=np.unique(numeric_labels), y=numeric_labels)
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(device)
print(f"Class weights: {class_weights}")
 
# Update DataLoader to use weighted sampling
# NOTE: train_dataset is not defined yet here, so just define the sampler and sample_weights for later use
sample_weights = class_weights[numeric_labels]
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
# Do not create train_loader here; it will be created after train_dataset is defined

Label to index mapping: {'about': 0, 'accident': 1, 'africa': 2, 'again': 3, 'all': 4, 'always': 5, 'animal': 6, 'apple': 7, 'approve': 8, 'argue': 9, 'arrive': 10, 'baby': 11, 'back': 12, 'backpack': 13, 'bad': 14, 'bake': 15, 'balance': 16, 'ball': 17, 'banana': 18, 'bar': 19, 'basketball': 20, 'bath': 21, 'bathroom': 22, 'beard': 23, 'because': 24, 'bed': 25, 'before': 26, 'behind': 27, 'bird': 28, 'birthday': 29, 'black': 30, 'blanket': 31, 'blue': 32, 'book': 33, 'bowling': 34, 'boy': 35, 'bring': 36, 'brother': 37, 'brown': 38, 'business': 39, 'but': 40, 'buy': 41, 'call': 42, 'can': 43, 'candy': 44, 'careful': 45, 'cat': 46, 'catch': 47, 'center': 48, 'cereal': 49, 'chair': 50, 'champion': 51, 'change': 52, 'chat': 53, 'cheat': 54, 'check': 55, 'cheese': 56, 'children': 57, 'christmas': 58, 'city': 59, 'class': 60, 'clock': 61, 'close': 62, 'clothes': 63, 'coffee': 64, 'cold': 65, 'college': 66, 'color': 67, 'computer': 68, 'convince': 69, 'cook': 70, 'cool': 71, 'copy': 72, 'co

In [11]:
# Define the dataset class
class ASLDataset(Dataset):
    def __init__(self, data, labels, mask):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
        self.mask = torch.tensor(mask, dtype=torch.float32)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx], self.mask[idx]

# Create the datasets
train_dataset = ASLDataset(X_train, y_train, mask_train)
val_dataset = ASLDataset(X_val, y_val, mask_val)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model Definition: BiLSTM for Temporal Data

Define a BiLSTM model for ASL recognition.


In [12]:
import torch
import torch.nn as nn
import numpy as np

# Print information about the data shape
print(f"Data shape: {data.shape}")
if len(data) > 0:
    print(f"Each sample has {data[0].shape[0]} frames with {data[0].shape[1]} features per frame")

# Replace RNNModel with BiLSTM
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout_rate=0.3):
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=dropout_rate if num_layers > 1 else 0
        )
        self.layer_norm = nn.LayerNorm(hidden_size * 2)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x, mask=None):
        # x: (batch, seq, feat)
        # mask: (batch, seq)
        out, _ = self.lstm(x)  # (batch, seq, hidden*2)
        if mask is not None:
            # For each sample, get the last valid (unpadded) output
            lengths = mask.sum(dim=1).long()  # (batch,)
            last_outputs = []
            for i, l in enumerate(lengths):
                last_outputs.append(out[i, l-1, :])
            out = torch.stack(last_outputs, dim=0)  # (batch, hidden*2)
        else:
            out = out[:, -1, :]
        out = self.layer_norm(out)
        out = self.dropout(out)
        out = self.fc(out)
        return out

# Get the input size from the processed data
input_size = data[0].shape[1] if len(data) > 0 else 0
hidden_size = 128
output_size = len(np.unique(labels))
print(f"Input size for the model: {input_size}")
print(f"Number of unique labels (classes): {output_size}")

model = BiLSTM(input_size, hidden_size, output_size)
print(model)
model = model.to(device)

Data shape: (3202, 60, 225)
Each sample has 60 frames with 225 features per frame
Input size for the model: 225
Number of unique labels (classes): 300
BiLSTM(
  (lstm): LSTM(225, 128, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
  (layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=256, out_features=300, bias=True)
)


# Hyperparameter Tuning with Grid Search

We use a custom grid search implementation for hyperparameter optimization.


In [13]:
# Import Optuna for hyperparameter optimization
import optuna
import numpy as np

print("Starting hyperparameter optimization using Optuna...")

# Define the objective function for Optuna
def objective(trial):
    # Define the hyperparameter search space
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
    hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128])
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.8)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-4, 1e-2)
    num_layers = trial.suggest_int('num_layers', 1, 3)

    # Create the model with the sampled hyperparameters
    model = BiLSTM(input_size, hidden_size, output_size, num_layers=num_layers, dropout_rate=dropout_rate).to(device)

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training loop (simplified for Optuna)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(10):  # Limit epochs for faster optimization
        model.train()
        for inputs, targets, mask in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, mask)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        # Validation loop
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets, mask in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs, mask)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        # Report validation loss to Optuna
        trial.report(val_loss, epoch)

        # Handle pruning
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_loss

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=3600)

# Extract the best hyperparameters
best_params = study.best_params
print("Best hyperparameters:", best_params)

# Ensure all required parameters are stored for model training
best_params = {
    'learning_rate': best_params['learning_rate'],
    'hidden_size': best_params['hidden_size'],
    'dropout_rate': best_params['dropout_rate'],
    'batch_size': best_params['batch_size'],
    'weight_decay': best_params['weight_decay'],
    'num_layers': best_params['num_layers']
}




[I 2025-05-19 23:05:40,656] A new study created in memory with name: no-name-525074af-57bb-484f-9af0-c6429ef5cd7e


Starting hyperparameter optimization using Optuna...


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.8)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-4, 1e-2)
[I 2025-05-19 23:05:57,387] Trial 0 finished with value: 8.004262129465738 and parameters: {'learning_rate': 0.0836391381522659, 'hidden_size': 128, 'dropout_rate': 0.24920083011100747, 'batch_size': 16, 'weight_decay': 0.00020274495282427478, 'num_layers': 1}. Best is trial 0 with value: 8.004262129465738.
[I 2025-05-19 23:05:57,387] Trial 0 finished with value: 8.004262129465738 and parameters: {'learning_rate': 0.0836391381522659, 'hidden_size': 128, 'dropout_rate': 0.24920083011100747, 'batch_size': 16, 'weight_decay': 0.00020274495282427478, 'num_layers': 1}. Best is trial 0 with value: 8.004262129465738.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.8)
  weight_decay = trial.suggest_logunif

Best hyperparameters: {'learning_rate': 0.0022818287736223433, 'hidden_size': 32, 'dropout_rate': 0.3647233806073735, 'batch_size': 32, 'weight_decay': 0.004327172888931149, 'num_layers': 2}


# Model Training with K-Fold Cross-Validation

Train the model using k-fold cross-validation to better evaluate its performance. This section also switches to an RNN-based architecture and ensures proper batch normalization.


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm  # Use notebook-friendly version of tqdm

# Define the RNN model for classification
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout_rate=0.3):
        super(RNNModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(
            input_size, hidden_size, batch_first=True, num_layers=num_layers, dropout=dropout_rate if num_layers > 1 else 0
        )
        self.layer_norm = nn.LayerNorm(hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
        # Dropout after RNN output
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x, _ = self.rnn(x)
        x = self.layer_norm(x[:, -1, :])
        x = self.dropout(x)
        x = self.fc(x)
        return x

# Use best hyperparameters from grid search
best_lr = best_params['learning_rate']
best_hidden_size = best_params['hidden_size']
best_dropout_rate = best_params['dropout_rate']
best_batch_size = max(2, best_params['batch_size'])  # Ensure batch size is above 1
best_weight_decay = best_params['weight_decay']
best_num_layers = best_params['num_layers']

print(f"Training with best hyperparameters:")
print(f"  Learning rate: {best_lr}")
print(f"  Hidden size: {best_hidden_size}")
print(f"  Dropout rate: {best_dropout_rate}")
print(f"  Batch size: {best_batch_size}")
print(f"  Weight decay: {best_weight_decay}")
print(f"  Number of layers: {best_num_layers}")

# Initialize k-fold cross-validation
k_folds = 3
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

# Training with k-fold cross-validation
fold_results = []
for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
    print(f"Fold {fold+1}/{k_folds}")

    # Create data loaders for the current fold
    train_subset = Subset(train_dataset, train_idx)
    val_subset = Subset(train_dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=best_batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=best_batch_size, shuffle=False)

    # Recreate model for each fold with best hyperparameters
    model = BiLSTM(
        input_size, best_hidden_size, output_size, num_layers=best_num_layers, dropout_rate=best_dropout_rate).to(device)

    # Define the loss function and optimizer with weight decay
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=best_lr, weight_decay=best_weight_decay)
    # Use ReduceLROnPlateau scheduler for better learning rate adaptation
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

    # Training loop
    train_losses = []
    val_losses = []
    patience = 10
    early_stop_counter = 0
    best_val_loss = float('inf')
    epochs = 50
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}", end="\r")
        model.train()
        train_loss = 0
        for batch in tqdm(train_loader, desc=f"Training", leave=False):
            inputs, targets, mask = batch
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, mask)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        # Validation loop
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Validation", leave=False):
                inputs, targets, mask = batch
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs, mask)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        val_loss /= len(val_loader)
        val_losses.append(val_loss)

        # Print progress only once per epoch
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # Step the scheduler
        scheduler.step(val_loss)

        # Save the model if validation loss improves
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), f"best_model_fold{fold+1}.pth")
            print("Model checkpoint saved!")
            early_stop_counter = 0
        else:
            early_stop_counter += 1
            print(f"Early stopping counter: {early_stop_counter}/{patience}")
        if early_stop_counter >= patience:
            print("Early stopping triggered.")
            break

    fold_results.append(best_val_loss)

# Print cross-validation results
print("\nCross-validation results:")
for fold, loss in enumerate(fold_results):
    print(f"Fold {fold+1}: Validation Loss = {loss:.4f}")
print(f"Average Validation Loss: {sum(fold_results)/len(fold_results):.4f}")

In [None]:
# Plot Training and Validation Loss Curves
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Curves")
plt.legend()
plt.show()

# Evaluation

Evaluate the model on the test set and display metrics.


In [None]:
# Evaluate the model
test_dataset = ASLDataset(X_test, y_test, mask_test)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)
model.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for batch in test_loader:
        inputs, targets, mask = batch
        inputs, targets = inputs.to(device), targets.to(device)  # Move inputs to device too
        outputs = model(inputs, mask)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == targets).sum().item()
        total += targets.size(0)
test_loss /= len(test_loader)
accuracy = correct / total
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Save the model for transfer learning
os.makedirs('./models', exist_ok=True)  # Create directory if it doesn't exist
torch.save({
    'model_state_dict': model.state_dict(),
    'input_size': input_size,
    'hidden_size': best_hidden_size,
    'dropout_rate': best_dropout_rate,
    'output_size': output_size,
    'accuracy': accuracy
}, "./models/transfer_learning_model.pth")
print("Model saved for transfer learning!")

# Inference

Test the model on new data or perform real-time inference.


In [None]:
# Example inference
model.eval()
sample = torch.tensor(X_test[0:1], dtype=torch.float32).to(device)  # Move sample to the correct device
with torch.no_grad():
    prediction = model(sample)
    predicted_label = torch.argmax(prediction, dim=1).item()
print(f"Predicted label: {predicted_label}, True label: {y_test[0]}")