# Train ASL Classifier (Deep Learning)

This notebook trains a Keras MLP on 126-d landmark features and saves a `.keras` model plus the class names for realtime inference.


In [None]:
# Optional: install deps
# %pip install numpy tensorflow scikit-learn


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [None]:
# Parameters
DATA_PATH = "train_landmarks.npz"
MODEL_PATH = "asl_landmark_mlp.keras"
CLASSES_PATH = "label_classes.npy"
TEST_SIZE = 0.15
RANDOM_STATE = 42
EPOCHS = 80
BATCH_SIZE = 256
BASE_LR = 1e-3
WEIGHT_DECAY = 1e-4
DROPOUT1 = 0.3
DROPOUT2 = 0.3
# Augmentation params
AUGMENT = True
AUGMENT_FACTOR = 2  # times to replicate training set with aug
JITTER_STD = 0.01  # additive Gaussian noise on normalized coords
SCALE_MIN, SCALE_MAX = 0.9, 1.1
ROTATE_DEG = 10.0  # +/- degrees (2D rotation on x,y)
MIRROR_PROB = 0.5


In [None]:
# Load data
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found at {DATA_PATH}")

data = np.load(DATA_PATH)
X = data['hand_landmarks'].astype(np.float32)
y = data['labels'].astype(str)

# Encode classes and save class ordering
classes, y_indices = np.unique(y, return_inverse=True)
np.save(CLASSES_PATH, classes)

# Per-hand normalization before split
X = normalize_per_hand(X)

# Train/val split
X_train, X_val, y_train, y_val = train_test_split(
    X, y_indices, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y_indices
)

# Optional augmentation on training only (after normalization)
if AUGMENT:
    X_train, y_train = make_augmented_dataset(X_train, y_train, AUGMENT_FACTOR)

# Standardize features (fit on train only)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Save scaler for inference
SCALER_PATH = "feature_scaler.joblib"
import joblib
joblib.dump(scaler, SCALER_PATH)
print(f"Scaler saved to: {SCALER_PATH}")

input_dim = X_train.shape[1]
num_classes = classes.shape[0]
print("Train shape:", X_train.shape, "Val shape:", X_val.shape, "Classes:", num_classes)


In [None]:
# Build model (MLP)
keras.utils.set_random_seed(RANDOM_STATE)
model = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(512, use_bias=False),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.Dropout(DROPOUT1),

    layers.Dense(512, use_bias=False),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.Dropout(DROPOUT2),

    layers.Dense(256, use_bias=False),
    layers.BatchNormalization(),
    layers.Activation("relu"),
    layers.Dropout(0.2),

    layers.Dense(num_classes, activation="softmax"),
])

# Cosine decay LR and AdamW
steps_per_epoch = max(1, X_train.shape[0] // BATCH_SIZE)
lr_schedule = keras.optimizers.schedules.CosineDecay(initial_learning_rate=BASE_LR, decay_steps=steps_per_epoch * EPOCHS)
optimizer = keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=WEIGHT_DECAY)

model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) 
model.summary()


In [None]:
# Callbacks
ckpt_cb = keras.callbacks.ModelCheckpoint(MODEL_PATH, monitor="val_accuracy", save_best_only=True)
es_cb = keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=10, restore_best_weights=True)
rlr_cb = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3)


In [None]:
# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[ckpt_cb, es_cb, rlr_cb],
    verbose=1,
)

# Evaluate
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation accuracy: {val_acc:.4f}")
print(f"Model saved to: {MODEL_PATH}\nClasses saved to: {CLASSES_PATH}")


In [None]:
# Normalization and augmentation utilities
import math

NUM_HANDS = 2
NUM_LANDMARKS = 21
COORDS = 3
VEC_LEN = NUM_HANDS * NUM_LANDMARKS * COORDS
WRIST_IDX = 0
MIDDLE_MCP_IDX = 9  # used for approximate scale


def reshape_landmarks(vec):
    return vec.reshape(NUM_HANDS, NUM_LANDMARKS, COORDS)


def normalize_per_hand(X_arr: np.ndarray) -> np.ndarray:
    Xn = X_arr.copy()
    Xn = Xn.reshape(-1, NUM_HANDS, NUM_LANDMARKS, COORDS)
    for i in range(Xn.shape[0]):
        for h in range(NUM_HANDS):
            hand = Xn[i, h]
            if np.allclose(hand, 0.0):
                continue
            wrist = hand[WRIST_IDX]
            hand[:, :2] -= wrist[:2]  # translate by wrist in x,y; keep z as-is
            # scale by distance wrist->middle_mcp on xy plane
            ref = hand[MIDDLE_MCP_IDX]
            scale = np.linalg.norm(ref[:2])
            if scale > 1e-6:
                hand[:, :2] /= scale
            Xn[i, h] = hand
    return Xn.reshape(-1, VEC_LEN)


def random_rotate_xy(points: np.ndarray, max_deg: float) -> np.ndarray:
    theta = np.deg2rad(np.random.uniform(-max_deg, max_deg))
    c, s = math.cos(theta), math.sin(theta)
    R = np.array([[c, -s], [s, c]], dtype=np.float32)
    pts = points.copy()
    pts[:, :2] = pts[:, :2] @ R.T
    return pts


def augment_sample(vec: np.ndarray) -> np.ndarray:
    arr = reshape_landmarks(vec).copy()
    for h in range(NUM_HANDS):
        hand = arr[h]
        if np.allclose(hand, 0.0):
            continue
        # random mirror on x
        if np.random.rand() < MIRROR_PROB:
            hand[:, 0] *= -1.0
        # scale xy
        s = np.random.uniform(SCALE_MIN, SCALE_MAX)
        hand[:, :2] *= s
        # rotate on xy
        hand = random_rotate_xy(hand, ROTATE_DEG)
        # jitter x,y,z
        hand += np.random.normal(0.0, JITTER_STD, size=hand.shape).astype(np.float32)
        arr[h] = hand
    return arr.reshape(-1)


def make_augmented_dataset(X_tr: np.ndarray, y_tr: np.ndarray, factor: int) -> tuple[np.ndarray, np.ndarray]:
    if factor <= 1:
        return X_tr, y_tr
    aug_X = [X_tr]
    aug_y = [y_tr]
    for _ in range(factor - 1):
        X_new = np.vstack([augment_sample(x) for x in X_tr])
        aug_X.append(X_new)
        aug_y.append(y_tr)
    X_out = np.vstack(aug_X)
    y_out = np.concatenate(aug_y)
    return X_out, y_out
