# ASL Recognition: End-to-End Workflow

This notebook consolidates the entire workflow for preprocessing, model training, and evaluation for American Sign Language (ASL) recognition.


In [1]:
# Import Required Libraries
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Preprocessing: Load and Normalize Keypoints

We will load the preprocessed keypoints from the JSON files and normalize them.


In [None]:
# Configuration
DATA_DIR = Path(r'f:\Uni_Stuff\6th_Sem\DL\Proj\video-asl-recognition\pose_estimation\data\keypoints')

# Load and normalize keypoints
def load_keypoints(data_dir):
    data = []
    labels = []
    skipped_files = 0
    processed_files = 0
    
    print(f"Loading keypoints from {data_dir}")
    
    for label_dir in tqdm(data_dir.iterdir(), desc="Loading labels"):
        if not label_dir.is_dir():
            continue
        
        label_files = list(label_dir.glob('*.json'))
        print(f"Found {len(label_files)} files in {label_dir.name}")
        
        for json_file in tqdm(label_files, desc=f"Processing {label_dir.name}", leave=False):
            try:
                with open(json_file, 'r') as f:
                    content = json.load(f)
                    
                    # Ensure required keys are present
                    if 'keypoints' not in content or 'label' not in content:
                        print(f"Warning: Missing required keys in {json_file}")
                        skipped_files += 1
                        continue
                    
                    keypoints = content['keypoints']
                    label = content['label']
                    
                    # Process the keypoints into a consistent format
                    processed_frames = []
                    
                    for frame in keypoints:
                        # Define the expected full feature size
                        # MediaPipe hands has 21 landmarks per hand with x,y,z (21*3=63 per hand)
                        # MediaPipe pose has 33 landmarks with x,y,z (33*3=99)
                        # Maximum expected: 2 hands (126) + pose (99) = 225 features
                        
                        # Initialize arrays with zeros
                        left_hand_features = np.zeros(63, dtype=np.float32)  # 21 points * 3 coords
                        right_hand_features = np.zeros(63, dtype=np.float32) # 21 points * 3 coords
                        pose_features = np.zeros(99, dtype=np.float32)       # 33 points * 3 coords
                        
                        # Extract hand keypoints if present
                        if 'hands' in frame and frame['hands']:
                            for i, hand in enumerate(frame['hands']):
                                # Each hand has 21 keypoints with x,y,z coordinates
                                if i < 2:  # Only process up to 2 hands
                                    hand_features = []
                                    for point in hand:
                                        if isinstance(point, list) and len(point) == 3:
                                            hand_features.extend(point)  # Add x,y,z for each keypoint
                                    
                                    # Assign to left or right hand array (assuming first hand is left)
                                    if i == 0 and len(hand_features) <= 63:
                                        left_hand_features[:len(hand_features)] = hand_features
                                    elif i == 1 and len(hand_features) <= 63:
                                        right_hand_features[:len(hand_features)] = hand_features
                        
                        # Extract pose keypoints if present
                        if 'pose' in frame and frame['pose']:
                            pose_data = []
                            for point in frame['pose']:
                                if isinstance(point, list) and len(point) == 3:
                                    pose_data.extend(point)  # Add x,y,z for each keypoint
                            
                            # Ensure we don't exceed the expected size
                            if len(pose_data) <= 99:
                                pose_features[:len(pose_data)] = pose_data
                        
                        # Combine all features into a single consistent vector
                        frame_features = np.concatenate([left_hand_features, right_hand_features, pose_features])
                        processed_frames.append(frame_features)
                    
                    # Only add sequences with features
                    if processed_frames:
                        processed_data = np.array(processed_frames, dtype=np.float32)
                        
                        # Additional validation
                        if processed_data.shape[0] > 0 and processed_data.shape[1] > 0:
                            data.append(processed_data)
                            labels.append(label)
                            processed_files += 1
                        else:
                            print(f"Warning: Invalid data shape in {json_file}: {processed_data.shape}")
                            skipped_files += 1
                    else:
                        print(f"Warning: No valid frames found in {json_file}")
                        skipped_files += 1
                        
            except Exception as e:
                print(f"Error processing {json_file}: {str(e)}")
                skipped_files += 1
    
    print(f"Processed {processed_files} files successfully, skipped {skipped_files} files")
    if not data:
        print("Warning: No valid data was loaded!")
        return np.array([]), np.array([])
        
    # Make sure all samples have the same shape for each sequence dimension
    # Find the maximum sequence length
    max_seq_len = max(sample.shape[0] for sample in data)
    feature_dim = data[0].shape[1]  # Should be 225 (63+63+99)
    
    # Create a padded dataset with uniform dimensions
    padded_data = []
    for sample in data:
        # If sequence is shorter than max_len, pad with zeros
        if sample.shape[0] < max_seq_len:
            padding = np.zeros((max_seq_len - sample.shape[0], feature_dim), dtype=np.float32)
            padded_sample = np.vstack((sample, padding))
        else:
            # If longer, truncate
            padded_sample = sample[:max_seq_len]
        padded_data.append(padded_sample)
    
    return np.array(padded_data), np.array(labels)

# Load data
data, labels = load_keypoints(DATA_DIR)

Loading keypoints from f:\Uni_Stuff\6th_Sem\DL\Proj\video-asl-recognition\pose_estimation\data\keypoints


Loading labels: 0it [00:00, ?it/s]

Found 8 files in about


Loading labels: 1it [00:00,  4.66it/s] [00:00<?, ?it/s][A

Found 13 files in accident


Loading labels: 2it [00:00,  6.46it/s]0/13 [00:00<?, ?it/s][A

Found 13 files in africa


Loading labels: 3it [00:00,  7.68it/s]13 [00:00<?, ?it/s][A

Found 10 files in again


Processing again:   0%|          | 0/10 [00:00<?, ?it/s][A

Found 13 files in all


Loading labels: 5it [00:00,  8.11it/s][00:00<?, ?it/s][A

Found 9 files in always


Loading labels: 6it [00:00,  6.26it/s]9 [00:00<?, ?it/s][A

Found 10 files in animal


Loading labels: 7it [00:01,  6.44it/s]10 [00:00<?, ?it/s][A

Found 13 files in apple


Loading labels: 8it [00:01,  6.93it/s]3 [00:00<?, ?it/s][A

Found 11 files in approve


Loading labels: 9it [00:01,  7.34it/s]/11 [00:00<?, ?it/s][A

Found 10 files in argue


Loading labels: 10it [00:01,  7.60it/s] [00:00<?, ?it/s][A

Found 10 files in arrive


Loading labels: 11it [00:01,  6.19it/s]

Found 10 files in baby


Processing baby:   0%|          | 0/10 [00:00<?, ?it/s][A

Found 7 files in back


Loading labels: 13it [00:01,  7.96it/s]00:00<?, ?it/s][A

Found 11 files in backpack


Loading labels: 14it [00:01,  8.06it/s]/11 [00:00<?, ?it/s][A

Found 11 files in bad


[A

Found 8 files in bake


Loading labels: 16it [00:02,  9.27it/s]00:00<?, ?it/s][A

Found 11 files in balance


Loading labels: 17it [00:02,  7.17it/s]11 [00:00<?, ?it/s][A

Found 11 files in ball


Loading labels: 18it [00:02,  7.36it/s][00:00<?, ?it/s][A

Found 10 files in banana


Loading labels: 19it [00:02,  7.64it/s]0 [00:00<?, ?it/s][A

Found 10 files in bar


Processing bar:   0%|          | 0/10 [00:00<?, ?it/s][A

Found 12 files in basketball


Loading labels: 21it [00:02,  6.50it/s] 0/12 [00:00<?, ?it/s][A

Found 10 files in bath


Loading labels: 22it [00:03,  6.63it/s][00:00<?, ?it/s][A

Found 10 files in bathroom


Loading labels: 23it [00:03,  7.24it/s]/10 [00:00<?, ?it/s][A

Found 10 files in beard


Loading labels: 24it [00:03,  7.34it/s] [00:00<?, ?it/s][A

Found 7 files in because


Processing because:   0%|          | 0/7 [00:00<?, ?it/s][A

Found 13 files in bed


Loading labels: 26it [00:03,  5.96it/s]00:00<?, ?it/s][A

Found 17 files in before


Loading labels: 27it [00:03,  5.43it/s]7 [00:00<?, ?it/s][A

Found 9 files in behind


Loading labels: 28it [00:04,  6.00it/s] [00:00<?, ?it/s][A

Found 12 files in bird


Loading labels: 29it [00:04,  6.46it/s][00:00<?, ?it/s][A

Found 9 files in birthday


Loading labels: 30it [00:04,  5.61it/s]

Found 13 files in black


Loading labels: 31it [00:04,  5.49it/s] [00:00<?, ?it/s][A

Found 8 files in blanket


[A

Found 12 files in blue


Loading labels: 33it [00:04,  6.77it/s][00:00<?, ?it/s][A

Found 10 files in book


Loading labels: 34it [00:04,  7.15it/s][00:00<?, ?it/s][A

Found 13 files in bowling


Loading labels: 35it [00:05,  7.31it/s]

Found 10 files in boy


Loading labels: 36it [00:05,  5.47it/s]

Found 10 files in bring


Loading labels: 37it [00:05,  5.82it/s] [00:00<?, ?it/s][A

Found 11 files in brother


Loading labels: 38it [00:05,  6.20it/s]11 [00:00<?, ?it/s][A

Found 11 files in brown


Loading labels: 39it [00:05,  6.34it/s] [00:00<?, ?it/s][A

Found 11 files in business


Loading labels: 40it [00:05,  6.68it/s]

Found 10 files in but


Loading labels: 41it [00:06,  5.51it/s]

Found 11 files in buy


Loading labels: 42it [00:06,  6.14it/s]00:00<?, ?it/s][A

Found 11 files in call


Loading labels: 43it [00:06,  6.53it/s]

Found 12 files in can


Loading labels: 44it [00:06,  6.63it/s]00:00<?, ?it/s][A

Found 14 files in candy


Loading labels: 45it [00:06,  5.24it/s]

Found 8 files in careful


Processing careful:   0%|          | 0/8 [00:00<?, ?it/s][A

Found 11 files in cat


Loading labels: 47it [00:07,  5.58it/s]00:00<?, ?it/s][A

Found 9 files in catch


Loading labels: 48it [00:07,  4.65it/s][00:00<?, ?it/s][A

Found 10 files in center


Loading labels: 49it [00:08,  3.47it/s]0 [00:00<?, ?it/s][A

Found 9 files in cereal


Loading labels: 50it [00:08,  2.97it/s]

Found 9 files in chair


Loading labels: 51it [00:08,  2.92it/s]

Found 8 files in champion


Loading labels: 52it [00:09,  2.83it/s]/8 [00:00<?, ?it/s][A

Found 12 files in change


Loading labels: 53it [00:09,  2.64it/s]2 [00:00<?, ?it/s][A

Found 10 files in chat


Loading labels: 54it [00:10,  2.76it/s][00:00<?, ?it/s][A

Found 12 files in cheat


Loading labels: 55it [00:10,  2.48it/s] [00:00<?, ?it/s][A

Found 13 files in check


Loading labels: 56it [00:10,  2.52it/s] [00:00<?, ?it/s][A

Found 9 files in cheese


Loading labels: 57it [00:11,  2.75it/s] [00:00<?, ?it/s][A

Found 8 files in children


Loading labels: 58it [00:11,  2.82it/s]/8 [00:00<?, ?it/s][A

Found 9 files in christmas


Loading labels: 59it [00:11,  3.07it/s]0/9 [00:00<?, ?it/s][A

Found 13 files in city


Loading labels: 60it [00:12,  2.47it/s][00:00<?, ?it/s][A

Found 10 files in class


Loading labels: 61it [00:12,  2.44it/s] [00:00<?, ?it/s][A

Found 7 files in clock


Loading labels: 62it [00:13,  2.79it/s][00:00<?, ?it/s][A

Found 8 files in close


Loading labels: 63it [00:13,  2.65it/s][00:00<?, ?it/s][A

Found 9 files in clothes


Loading labels: 64it [00:13,  2.53it/s]9 [00:00<?, ?it/s][A

Found 9 files in coffee


Loading labels: 65it [00:14,  2.68it/s] [00:00<?, ?it/s][A

Found 12 files in cold


Loading labels: 66it [00:14,  2.64it/s][00:00<00:00, 32.26it/s][A

Found 11 files in college


Loading labels: 67it [00:15,  2.60it/s]11 [00:00<?, ?it/s][A

Found 14 files in color


Loading labels: 68it [00:15,  2.45it/s] [00:00<?, ?it/s][A

Found 20 files in computer


Loading labels: 69it [00:16,  2.08it/s]/20 [00:00<?, ?it/s][A

Found 11 files in convince


Loading labels: 70it [00:16,  2.34it/s]/11 [00:00<?, ?it/s][A

Found 12 files in cook


Loading labels: 71it [00:16,  2.25it/s][00:00<?, ?it/s][A

Found 13 files in cool


Loading labels: 72it [00:17,  2.04it/s][00:00<?, ?it/s][A

Found 9 files in copy


Loading labels: 73it [00:17,  2.24it/s]00:00<?, ?it/s][A

Found 12 files in corn


Loading labels: 74it [00:18,  2.26it/s][00:00<?, ?it/s][A

Found 9 files in cough


Loading labels: 75it [00:18,  2.47it/s][00:00<?, ?it/s][A

Found 10 files in country


Loading labels: 76it [00:18,  2.64it/s]10 [00:00<?, ?it/s][A

Found 15 files in cousin


Loading labels: 77it [00:19,  2.28it/s]5 [00:00<?, ?it/s][A

Found 13 files in cow


Loading labels: 78it [00:20,  2.16it/s]00:00<?, ?it/s][A

Found 11 files in crash


Loading labels: 79it [00:20,  2.36it/s] [00:00<?, ?it/s][A

Found 9 files in crazy


Loading labels: 80it [00:20,  2.62it/s][00:00<?, ?it/s][A

Found 10 files in cry


Loading labels: 81it [00:21,  2.62it/s]00:00<?, ?it/s][A

Found 8 files in cute


Loading labels: 82it [00:21,  2.45it/s]00:00<?, ?it/s][A

Found 11 files in dance


Loading labels: 83it [00:21,  2.58it/s] [00:00<?, ?it/s][A

Found 13 files in dark


Loading labels: 84it [00:22,  2.45it/s][00:00<?, ?it/s][A

Found 11 files in daughter


Loading labels: 85it [00:22,  2.35it/s]/11 [00:00<?, ?it/s][A

Found 10 files in day


Loading labels: 86it [00:23,  2.56it/s]00:00<?, ?it/s][A

Found 13 files in deaf


Loading labels: 87it [00:23,  2.43it/s][00:00<?, ?it/s][A

Found 11 files in decide


Loading labels: 88it [00:24,  2.29it/s]1 [00:00<?, ?it/s][A

Found 9 files in delay


Loading labels: 89it [00:24,  2.40it/s][00:00<?, ?it/s][A

Found 10 files in delicious


Loading labels: 90it [00:24,  2.43it/s]0/10 [00:00<?, ?it/s][A

Found 11 files in different


Loading labels: 91it [00:25,  2.40it/s]

Found 9 files in disappear


Loading labels: 92it [00:25,  2.65it/s]0/9 [00:00<?, ?it/s][A

Found 11 files in discuss


Loading labels: 93it [00:25,  2.53it/s]11 [00:00<?, ?it/s][A

Found 7 files in divorce


Loading labels: 94it [00:26,  2.95it/s]7 [00:00<?, ?it/s][A

Found 10 files in doctor


Loading labels: 95it [00:26,  2.90it/s]0 [00:00<?, ?it/s][A

Found 14 files in dog


Loading labels: 96it [00:27,  2.41it/s]00:00<?, ?it/s][A

Found 10 files in door


Loading labels: 97it [00:27,  2.59it/s][00:00<?, ?it/s][A

Found 10 files in draw


Loading labels: 98it [00:27,  2.55it/s][00:00<?, ?it/s][A

Found 9 files in dress


Loading labels: 99it [00:28,  2.49it/s][00:00<?, ?it/s][A

Found 21 files in drink


Loading labels: 100it [00:28,  1.96it/s]

Found 8 files in drive


Loading labels: 101it [00:29,  2.26it/s]00:00<?, ?it/s][A

Found 10 files in drop


Loading labels: 102it [00:29,  2.48it/s]00:00<?, ?it/s][A

Found 8 files in east


Loading labels: 103it [00:29,  2.74it/s]0:00<?, ?it/s][A

Found 8 files in easy


Loading labels: 104it [00:30,  2.92it/s]0:00<?, ?it/s][A

Found 9 files in eat


Loading labels: 105it [00:30,  3.15it/s]:00<?, ?it/s][A

Found 8 files in egg


Loading labels: 106it [00:30,  2.72it/s]:00<?, ?it/s][A

Found 10 files in enjoy


Loading labels: 107it [00:31,  2.62it/s][00:00<?, ?it/s][A

Found 9 files in environment


Loading labels: 108it [00:31,  2.71it/s] 0/9 [00:00<?, ?it/s][A

Found 8 files in example


Loading labels: 109it [00:31,  3.02it/s] [00:00<?, ?it/s][A

Found 14 files in family


Loading labels: 110it [00:32,  2.25it/s] [00:00<?, ?it/s][A

Found 12 files in far


Loading labels: 111it [00:33,  2.11it/s]

Found 11 files in fat


Loading labels: 112it [00:33,  1.78it/s]0:00<?, ?it/s][A

Found 10 files in father


Loading labels: 113it [00:34,  2.06it/s] [00:00<?, ?it/s][A

Found 7 files in fault


Loading labels: 114it [00:34,  2.21it/s]00:00<?, ?it/s][A

Found 9 files in feel


Loading labels: 115it [00:34,  2.37it/s]0:00<?, ?it/s][A

Found 14 files in fine


Loading labels: 116it [00:35,  2.25it/s]

Found 12 files in finish


Loading labels: 117it [00:35,  2.29it/s] [00:00<?, ?it/s][A

Found 9 files in first


Loading labels: 118it [00:36,  2.46it/s]00:00<?, ?it/s][A

Found 14 files in fish


Loading labels: 119it [00:36,  2.43it/s]00:00<?, ?it/s][A

Found 9 files in flower


Loading labels: 120it [00:36,  2.49it/s][00:00<?, ?it/s][A

Found 9 files in football


Loading labels: 121it [00:37,  2.56it/s]9 [00:00<?, ?it/s][A

Found 14 files in forget


Loading labels: 122it [00:37,  2.48it/s] [00:00<?, ?it/s][A

Found 8 files in friend


Loading labels: 123it [00:38,  2.81it/s][00:00<?, ?it/s][A

Found 8 files in friendly


Loading labels: 124it [00:38,  3.04it/s]8 [00:00<?, ?it/s][A

Found 12 files in full


Loading labels: 125it [00:38,  2.73it/s]00:00<?, ?it/s][A

Found 9 files in future


Loading labels: 126it [00:39,  2.98it/s][00:00<?, ?it/s][A

Found 9 files in game


Loading labels: 127it [00:39,  2.88it/s]0:00<?, ?it/s][A

Found 10 files in girl


Loading labels: 128it [00:39,  3.02it/s]00:00<?, ?it/s][A

Found 13 files in give


Loading labels: 129it [00:40,  2.49it/s]00:00<?, ?it/s][A

Found 10 files in glasses


Loading labels: 130it [00:40,  2.31it/s]0 [00:00<?, ?it/s][A

Found 17 files in go


Loading labels: 131it [00:41,  2.13it/s]:00<?, ?it/s][A

Found 10 files in good


Loading labels: 132it [00:41,  2.34it/s]00:00<?, ?it/s][A

Found 11 files in government


Loading labels: 133it [00:41,  2.66it/s]0/11 [00:00<?, ?it/s][A

Found 12 files in graduate


Loading labels: 134it [00:42,  2.52it/s]12 [00:00<?, ?it/s][A

Found 10 files in green


Loading labels: 135it [00:42,  2.79it/s][00:00<?, ?it/s][A

Found 14 files in hair


Loading labels: 136it [00:43,  2.48it/s]00:00<?, ?it/s][A

Found 9 files in halloween


Loading labels: 137it [00:43,  2.79it/s]/9 [00:00<?, ?it/s][A

Found 10 files in happy


Loading labels: 138it [00:43,  2.98it/s][00:00<?, ?it/s][A

Found 7 files in hard


Loading labels: 139it [00:43,  3.26it/s]0:00<?, ?it/s][A

Found 13 files in hat


Loading labels: 140it [00:44,  2.47it/s]0:00<?, ?it/s][A

Found 8 files in have


Loading labels: 141it [00:44,  2.46it/s]0:00<?, ?it/s][A

Found 13 files in headache


Loading labels: 142it [00:45,  2.40it/s]13 [00:00<?, ?it/s][A

Found 10 files in hear


Loading labels: 143it [00:45,  2.62it/s]00:00<?, ?it/s][A

Found 14 files in hearing


Loading labels: 144it [00:46,  2.46it/s]4 [00:00<?, ?it/s][A

Found 9 files in heart


Loading labels: 145it [00:46,  2.46it/s]00:00<?, ?it/s][A

Found 14 files in help


Loading labels: 146it [00:47,  2.37it/s]00:00<?, ?it/s][A

Found 9 files in here


Loading labels: 147it [00:47,  2.50it/s]0:00<?, ?it/s][A

Found 9 files in home


Loading labels: 148it [00:47,  2.86it/s]0:00<?, ?it/s][A

Found 11 files in hope


Loading labels: 149it [00:48,  2.61it/s]00:00<?, ?it/s][A

Found 15 files in hot


Loading labels: 150it [00:48,  2.37it/s]0:00<?, ?it/s][A

Found 9 files in hour


Loading labels: 151it [00:48,  2.63it/s]0:00<?, ?it/s][A

Found 9 files in house


Loading labels: 152it [00:49,  2.81it/s]00:00<?, ?it/s][A

Found 9 files in how


Loading labels: 153it [00:49,  2.67it/s]:00<?, ?it/s][A

Found 10 files in humble


Loading labels: 154it [00:50,  2.50it/s] [00:00<?, ?it/s][A

Found 7 files in hurry


Loading labels: 155it [00:50,  2.72it/s]00:00<?, ?it/s][A

Found 9 files in husband


Loading labels: 156it [00:50,  2.89it/s] [00:00<?, ?it/s][A

Found 9 files in improve


Loading labels: 157it [00:50,  2.88it/s] [00:00<?, ?it/s][A

Found 9 files in inform


Loading labels: 158it [00:51,  3.09it/s][00:00<?, ?it/s][A

Found 12 files in interest


Loading labels: 159it [00:51,  2.56it/s]12 [00:00<?, ?it/s][A

Found 10 files in internet


Loading labels: 160it [00:52,  2.71it/s]10 [00:00<00:00, 31.27it/s][A

Found 10 files in jacket


Loading labels: 161it [00:52,  2.95it/s] [00:00<?, ?it/s][A

Found 10 files in join


Loading labels: 162it [00:52,  2.95it/s]                       [A

Found 8 files in jump


Loading labels: 163it [00:53,  2.98it/s]0:00<?, ?it/s][A

Found 9 files in kill


Loading labels: 164it [00:53,  2.71it/s]0:00<?, ?it/s][A

Found 11 files in kiss


Loading labels: 165it [00:53,  2.56it/s]00:00<00:00, 23.83it/s][A

Found 6 files in knife


Loading labels: 166it [00:54,  2.88it/s]00:00<?, ?it/s][A

Found 11 files in know


Loading labels: 167it [00:54,  2.68it/s]00:00<?, ?it/s][A

Found 13 files in language


Loading labels: 168it [00:55,  2.38it/s]13 [00:00<?, ?it/s][A

Found 12 files in last


Loading labels: 169it [00:55,  2.48it/s]00:00<?, ?it/s][A

Found 10 files in late


Loading labels: 170it [00:55,  2.60it/s]00:00<?, ?it/s][A

Found 13 files in later


Loading labels: 171it [00:56,  2.29it/s][00:00<?, ?it/s][A

Found 12 files in laugh


Loading labels: 172it [00:56,  2.29it/s][00:00<?, ?it/s][A

Found 10 files in law


Loading labels: 173it [00:57,  2.22it/s]0:00<?, ?it/s][A

Found 11 files in learn


Loading labels: 174it [00:57,  2.41it/s][00:00<?, ?it/s][A

Found 12 files in leave


Loading labels: 175it [00:57,  2.56it/s][00:00<?, ?it/s][A

Found 12 files in letter


Loading labels: 176it [00:58,  2.54it/s] [00:00<00:00, 33.84it/s][A

Found 11 files in light


Loading labels: 177it [00:58,  2.23it/s][00:00<?, ?it/s][A

Found 17 files in like


Loading labels: 178it [00:59,  2.05it/s]00:00<?, ?it/s][A

Found 12 files in list


Loading labels: 179it [01:00,  2.07it/s][00:00<00:00, 24.83it/s][A

Found 6 files in live


Loading labels: 180it [01:00,  2.34it/s]0:00<?, ?it/s][A

Found 12 files in lose


Loading labels: 181it [01:00,  2.16it/s][A

Found 9 files in make


Loading labels: 182it [01:01,  2.05it/s]0:00<?, ?it/s][A

Found 13 files in man


Loading labels: 183it [01:01,  2.01it/s][A

Found 13 files in many


Loading labels: 184it [01:02,  1.99it/s]00:00<?, ?it/s][A

Found 10 files in match


Loading labels: 185it [01:02,  1.97it/s][00:00<?, ?it/s][A

Found 8 files in mean


Loading labels: 186it [01:03,  2.10it/s]0:00<?, ?it/s][A

Found 9 files in meat


Loading labels: 187it [01:03,  2.36it/s]0:00<?, ?it/s][A

Found 9 files in medicine


Loading labels: 188it [01:03,  2.59it/s]9 [00:00<?, ?it/s][A

Found 13 files in meet


Loading labels: 189it [01:04,  2.43it/s]00:00<?, ?it/s][A

Found 9 files in milk


Loading labels: 190it [01:04,  2.69it/s]0:00<?, ?it/s][A

Found 8 files in money


Loading labels: 191it [01:05,  2.62it/s]00:00<?, ?it/s][A

Found 9 files in more


Loading labels: 192it [01:05,  2.63it/s]0:00<?, ?it/s][A

Found 10 files in most


Loading labels: 193it [01:05,  2.64it/s]00:00<?, ?it/s][A

Found 16 files in mother


Loading labels: 194it [01:06,  2.28it/s] [00:00<?, ?it/s][A

Found 7 files in movie


Loading labels: 195it [01:06,  2.61it/s]00:00<?, ?it/s][A

Found 10 files in music


Processing music:   0%|          | 0/10 [00:00<?, ?it/s][A

# Dataset Preparation: Train-Test Split

Split the dataset into training, validation, and test sets.


In [None]:
# Split the dataset
X_train, X_temp, y_train, y_temp = train_test_split(data, labels, test_size=0.3, stratify=labels, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")

In [None]:
# Define the dataset class
class ASLDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Create the datasets
train_dataset = ASLDataset(X_train, y_train)
val_dataset = ASLDataset(X_val, y_val)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model Definition: BiLSTM for Temporal Data

Define a BiLSTM model for ASL recognition.


In [None]:
import torch
import torch.nn as nn
import numpy as np

# Print information about the data shape
print(f"Data shape: {data.shape}")
if len(data) > 0:
    print(f"Each sample has {data[0].shape[0]} frames with {data[0].shape[1]} features per frame")

# Define the model with Batch Normalization
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BiLSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size * 2)
        self.dropout1 = nn.Dropout(0.3)
        self.lstm2 = nn.LSTM(hidden_size * 2, hidden_size, batch_first=True, bidirectional=True)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size * 2)
        self.dropout2 = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_size * 2, output_size)
    
    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.batch_norm1(x.transpose(1, 2)).transpose(1, 2)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.batch_norm2(x.transpose(1, 2)).transpose(1, 2)
        x = self.dropout2(x)
        x = self.fc(x[:, -1, :])
        return x

# Get the input size from the processed data
input_size = data[0].shape[1] if len(data) > 0 else 0
print(f"Input size for the model: {input_size}")
hidden_size = 128
output_size = len(np.unique(labels))
print(f"Number of unique labels (classes): {output_size}")

model = BiLSTM(input_size, hidden_size, output_size)
print(model)
# Move model to selected device
model = model.to(device)

# Hyperparameter Tuning

Tune Hyperparams.


In [None]:
# Hyperparameter optimization with Optuna
import optuna

# Define the objective function for Optuna
def objective(trial):
    # Define hyperparameters to search
    lr = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    hidden_size = trial.suggest_categorical('hidden_size', [64, 128, 256])
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    
    # Create a new model with trial hyperparameters
    model_trial = BiLSTM(input_size, hidden_size, output_size).to(device)
    
    # Update dropout rates based on trial
    model_trial.dropout1 = nn.Dropout(dropout_rate)
    model_trial.dropout2 = nn.Dropout(dropout_rate)
    
    # Setup optimizer
    optimizer_trial = optim.Adam(model_trial.parameters(), lr=lr)
    criterion_trial = nn.CrossEntropyLoss()
    
    # Create dataloaders with trial batch size
    train_loader_trial = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader_trial = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Training loop with early stopping
    num_epochs = 10  # Fewer epochs for tuning
    patience = 3
    best_val_loss = float('inf')
    early_stop_counter = 0
    
    for epoch in range(num_epochs):
        model_trial.train()
        train_loss = 0
        for batch in train_loader_trial:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer_trial.zero_grad()
            outputs = model_trial(inputs)
            loss = criterion_trial(outputs, targets)
            loss.backward()
            optimizer_trial.step()
            train_loss += loss.item()
        train_loss /= len(train_loader_trial)
        
        model_trial.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader_trial:
                inputs, targets = batch
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model_trial(inputs)
                loss = criterion_trial(outputs, targets)
                val_loss += loss.item()
        val_loss /= len(val_loader_trial)
        
        # Report intermediate metric to Optuna
        trial.report(val_loss, epoch)
        
        # Handle pruning (early stopping for this trial)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                break
    
    return best_val_loss

# Create a study object and optimize the objective function
study = optuna.create_study(direction='minimize', pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20, timeout=3600)  # Run for max of 20 trials or 1 hour

# Print optimization results
print("\nBest trial:")
trial = study.best_trial
print(f"  Value (validation loss): {trial.value:.4f}")
print("  Params:")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Store best parameters for model training
best_params = {
    'learning_rate': trial.params['learning_rate'],
    'hidden_size': trial.params['hidden_size'],
    'dropout_rate': trial.params['dropout_rate'],
    'batch_size': trial.params['batch_size']
}

# Visualize the optimization history
plt.figure(figsize=(10, 6))
optuna.visualization.matplotlib.plot_optimization_history(study)
plt.title('Optimization History')
plt.show()

# Visualize parameter importances
plt.figure(figsize=(10, 6))
optuna.visualization.matplotlib.plot_param_importances(study)
plt.title('Parameter Importances')
plt.show()

# Model Training

Train the model on the training set and validate on the validation set.


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

# Use best hyperparameters from Optuna tuning
best_lr = best_params['learning_rate']
best_hidden_size = best_params['hidden_size']
best_dropout_rate = best_params['dropout_rate']
best_batch_size = best_params['batch_size']

print(f"Training with best hyperparameters:")
print(f"  Learning rate: {best_lr}")
print(f"  Hidden size: {best_hidden_size}")
print(f"  Dropout rate: {best_dropout_rate}")
print(f"  Batch size: {best_batch_size}")

# Recreate model with best hyperparameters
model = BiLSTM(input_size, best_hidden_size, output_size).to(device)
model.dropout1 = nn.Dropout(best_dropout_rate)
model.dropout2 = nn.Dropout(best_dropout_rate)

# Define the loss function and optimizer with best learning rate
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=best_lr)

# Create data loaders with best batch size
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)

# Training with Early Stopping
train_losses = []
val_losses = []
patience = 5
early_stop_counter = 0
best_val_loss = float('inf')
epochs = 20
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    # Save the model if validation loss improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_model.pth")
        print("Model checkpoint saved!")
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        print(f"Early stopping counter: {early_stop_counter}/{patience}")
    if early_stop_counter >= patience:
        print("Early stopping triggered.")
        break

In [None]:
# Plot Training and Validation Loss Curves
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Curves")
plt.legend()
plt.show()

# Evaluation

Evaluate the model on the test set and display metrics.


In [None]:
# Evaluate the model
test_dataset = ASLDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)
model.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for batch in test_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)  # Move inputs to device too
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == targets).sum().item()
        total += targets.size(0)
test_loss /= len(test_loader)
accuracy = correct / total
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Save the model for transfer learning
os.makedirs('./models', exist_ok=True)  # Create directory if it doesn't exist
torch.save({
    'model_state_dict': model.state_dict(),
    'input_size': input_size,
    'hidden_size': best_hidden_size,
    'dropout_rate': best_dropout_rate,
    'output_size': output_size,
    'accuracy': accuracy
}, "./models/transfer_learning_model.pth")
print("Model saved for transfer learning!")

# Inference

Test the model on new data or perform real-time inference.


In [None]:
# Example inference
model.eval()
sample = torch.tensor(X_test[0:1], dtype=torch.float32).to(device)  # Move sample to the correct device
with torch.no_grad():
    prediction = model(sample)
    predicted_label = torch.argmax(prediction, dim=1).item()
print(f"Predicted label: {predicted_label}, True label: {y_test[0]}")