In [None]:
# Cell 1: Upload preprocessed training data
# Run `python ml/preprocess_for_colab.py` locally first to create training_data.npz
# This is ~15-20 MB instead of hundreds of MB for the raw zip.
import os
import numpy as np
from google.colab import files

print('Select your training_data.npz file...')
uploaded = files.upload()

file_name = list(uploaded.keys())[0]
assert file_name.endswith('.npz'), f'Expected .npz file, got: {file_name}'
print(f'Uploaded: {file_name} ({len(uploaded[file_name]) / (1024*1024):.1f} MB)')

data = np.load(file_name)
X = data['X']
y_raw = data['y']
ACTIONS = data['actions']

SEQUENCE_LENGTH = X.shape[1]
NUM_FEATURES = X.shape[2]

MODELS_DIR = '/content/models'
os.makedirs(MODELS_DIR, exist_ok=True)

print(f'\nX shape: {X.shape}')
print(f'Actions ({len(ACTIONS)}): {list(ACTIONS)}')
print(f'Sequences per action:')
for i, action in enumerate(ACTIONS):
    print(f'  {action}: {np.sum(y_raw == i)}')

In [None]:
# Cell 2: Imports
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# Cell 3: Configuration
label_map = {label: num for num, label in enumerate(ACTIONS)}

# Dataset layout:
#   Per class: 90 sequences (Person A: 0-29, Person B: 30-89)
#   Demo target: Person B
SEQS_PER_CLASS = int(np.sum(y_raw == 0))
PERSON_A = (0, 30)    # sequences 0-29
PERSON_B = (30, SEQS_PER_CLASS)  # sequences 30-89

print(f'Actions ({len(ACTIONS)}): {list(ACTIONS)}')
print(f'Frames per sequence: {SEQUENCE_LENGTH}')
print(f'Features per frame: {NUM_FEATURES} (face landmarks stripped)')
print(f'Total sequences: {len(X)}')
print(f'Sequences per class: {SEQS_PER_CLASS}')
print(f'Person A: seqs {PERSON_A[0]}-{PERSON_A[1]-1}, Person B: seqs {PERSON_B[0]}-{PERSON_B[1]-1}')

In [None]:
# Cell 4: Prepare labels
y = to_categorical(y_raw, num_classes=len(ACTIONS))

print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')
print(f'Data already face-stripped and normalized by preprocess_for_colab.py')

In [None]:
# Cell 5: Person-aware Train/Val/Test Split
#
# CHANGE: Replaced random train_test_split with person-aware split.
# This prevents data leakage from near-duplicate sequences (recorded
# back-to-back by the same person) ending up in both train and test.
#
# Layout per class (90 sequences):
#   Train: Person A (0-29) + Person B early (30-71) = 72 sequences
#   Val:   Person B (72-80) = 9 sequences
#   Test:  Person B (81-89) = 9 sequences
#
# Val/Test are Person B only since the demo targets Person B.

X_train_list, y_train_list = [], []
X_val_list, y_val_list = [], []
X_test_list, y_test_list = [], []

for i, action in enumerate(ACTIONS):
    mask = y_raw == i
    class_data = X[mask]  # (90, 30, 258)

    # Train: Person A (0-29) + Person B early (30-71)
    train_data = np.concatenate([class_data[0:30], class_data[30:72]])
    # Val: Person B (72-80)
    val_data = class_data[72:81]
    # Test: Person B (81-89)
    test_data = class_data[81:90]

    X_train_list.append(train_data)
    X_val_list.append(val_data)
    X_test_list.append(test_data)
    y_train_list.extend([i] * len(train_data))
    y_val_list.extend([i] * len(val_data))
    y_test_list.extend([i] * len(test_data))

X_train = np.concatenate(X_train_list)
X_val = np.concatenate(X_val_list)
X_test = np.concatenate(X_test_list)
y_train = to_categorical(np.array(y_train_list), num_classes=len(ACTIONS))
y_val = to_categorical(np.array(y_val_list), num_classes=len(ACTIONS))
y_test = to_categorical(np.array(y_test_list), num_classes=len(ACTIONS))

# Shuffle training data
rng = np.random.default_rng(42)
idx = rng.permutation(len(X_train))
X_train, y_train = X_train[idx], y_train[idx]

print(f'Training samples:   {X_train.shape[0]} (Person A + Person B seqs 30-71)')
print(f'Validation samples: {X_val.shape[0]} (Person B seqs 72-80)')
print(f'Test samples:       {X_test.shape[0]} (Person B seqs 81-89)')
print(f'\nPer-class split:')
for i, action in enumerate(ACTIONS):
    tr = np.sum(np.argmax(y_train, axis=1) == i)
    va = np.sum(np.argmax(y_val, axis=1) == i)
    te = np.sum(np.argmax(y_test, axis=1) == i)
    print(f'  {action:15s}  train={tr}  val={va}  test={te}')

In [None]:
# Cell 5b: Data Augmentation (on 258-feature preprocessed data)
#
# CHANGES:
#   - AUGMENT_MULTIPLIER: 5 -> 7 (more diversity with mirror off)
#   - USE_MIRROR: False (ASL signs are dominant-hand specific)

AUGMENT_MULTIPLIER = 7
USE_MIRROR = False  # Disabled: ASL signs are hand-specific

# Hand indices in 258-feature space: [pose(0:132), lh(132:195), rh(195:258)]
LH_START, LH_END = 132, 195
RH_START, RH_END = 195, 258

def add_gaussian_noise(seq, std=0.005, rng=None):
    rng = rng or np.random.default_rng()
    aug = seq.copy()
    noise = rng.normal(0, std, size=aug.shape).astype(aug.dtype)
    mask = aug != 0
    aug[mask] += noise[mask]
    return aug

def temporal_shift(seq, max_shift=3, rng=None):
    rng = rng or np.random.default_rng()
    shift = rng.integers(-max_shift, max_shift + 1)
    if shift == 0: return seq.copy()
    aug = np.zeros_like(seq)
    sl = len(seq)
    if shift > 0:
        aug[:shift] = seq[0]; aug[shift:] = seq[:sl - shift]
    else:
        a = abs(shift); aug[:sl - a] = seq[a:]; aug[sl - a:] = seq[-1]
    return aug

def mirror_hands(seq):
    aug = seq.copy()
    lh, rh = aug[:, LH_START:LH_END].copy(), aug[:, RH_START:RH_END].copy()
    aug[:, LH_START:LH_END] = rh; aug[:, RH_START:RH_END] = lh
    return aug

def speed_variation(seq, factor_range=(0.85, 1.15), rng=None):
    rng = rng or np.random.default_rng()
    sl = len(seq); factor = rng.uniform(*factor_range)
    new_len = max(int(sl * factor), 2)
    orig_idx = np.linspace(0, sl - 1, new_len)
    tgt_idx = np.linspace(0, new_len - 1, sl)
    mapped = np.interp(tgt_idx, np.arange(new_len), orig_idx)
    aug = np.zeros_like(seq)
    for i in range(seq.shape[1]):
        aug[:, i] = np.interp(mapped, np.arange(sl), seq[:, i])
    return aug

def frame_dropout(seq, drop_rate=0.1, rng=None):
    rng = rng or np.random.default_rng()
    aug = seq.copy()
    for i in range(1, len(aug) - 1):
        if rng.random() < drop_rate: aug[i] = aug[i - 1]
    return aug

def augment_sequence(seq, rng=None):
    rng = rng or np.random.default_rng()
    aug = seq.copy()
    if rng.random() < 0.8: aug = add_gaussian_noise(aug, std=rng.uniform(0.002, 0.008), rng=rng)
    if rng.random() < 0.5: aug = temporal_shift(aug, max_shift=3, rng=rng)
    if rng.random() < 0.4: aug = speed_variation(aug, rng=rng)
    if rng.random() < 0.3: aug = frame_dropout(aug, drop_rate=0.1, rng=rng)
    return aug

# --- Apply augmentation ---
if AUGMENT_MULTIPLIER > 0:
    rng = np.random.default_rng(42)
    original_count = len(X_train)
    all_X, all_y = [X_train], [y_train]

    for i in range(AUGMENT_MULTIPLIER):
        batch = np.array([augment_sequence(s, rng=rng) for s in X_train])
        all_X.append(batch); all_y.append(y_train)

    X_train = np.concatenate(all_X, axis=0)
    y_train = np.concatenate(all_y, axis=0)

    idx = rng.permutation(len(X_train))
    X_train, y_train = X_train[idx], y_train[idx]

    print(f'Augmented: {original_count} -> {len(X_train)} training samples ({len(X_train)/original_count:.1f}x)')
else:
    print('Augmentation disabled (AUGMENT_MULTIPLIER=0)')

In [None]:
# Cell 6: Build Bidirectional LSTM Model
#
# CHANGES:
#   - l2 regularization: 1e-4 -> 5e-4 (stronger regularization)
#   - First dropout: 0.5 -> 0.6 (reduce overfitting)
#   - Loss: categorical_crossentropy -> with label_smoothing=0.1

reg = l2(5e-4)

model = Sequential([
    Bidirectional(LSTM(64, activation='tanh',
                       kernel_regularizer=reg, recurrent_regularizer=reg),
                  input_shape=(SEQUENCE_LENGTH, NUM_FEATURES)),
    BatchNormalization(),
    Dropout(0.6),

    Dense(32, activation='relu', kernel_regularizer=reg),
    Dropout(0.4),
    Dense(len(ACTIONS), activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
    metrics=['categorical_accuracy']
)
model.summary()

In [None]:
# Cell 7: Train
#
# CHANGES:
#   - batch_size: 16 -> 64 (smoother gradients, reduces val loss spikes)
#   - Added ReduceLROnPlateau (halves LR when val_loss plateaus)

early_stop = EarlyStopping(
    monitor='val_categorical_accuracy',
    patience=50,
    restore_best_weights=True,
    verbose=1
)
checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_categorical_accuracy',
    save_best_only=True,
    verbose=1
)
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=1e-6,
    verbose=1
)
tb_callback = TensorBoard(log_dir='./logs')

history = model.fit(
    X_train, y_train,
    epochs=500,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stop, checkpoint, reduce_lr, tb_callback],
    verbose=1
)

print(f'\nBest validation accuracy: {max(history.history["val_categorical_accuracy"]):.4f}')
print(f'Best validation loss: {min(history.history["val_loss"]):.4f}')

In [None]:
# Cell 8: Evaluate on TEST set
from tensorflow.keras.models import load_model

best_model = load_model('best_model.h5')

test_loss, test_acc = best_model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss:     {test_loss:.4f}')
print(f'Test Accuracy: {test_acc:.4f}')

y_pred = best_model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

print('\nClassification Report:')
print(classification_report(
    y_true_classes,
    y_pred_classes,
    target_names=ACTIONS.tolist(),
    zero_division=0
))

# Confusion matrix heatmap
cm = confusion_matrix(y_true_classes, y_pred_classes)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=ACTIONS, yticklabels=ACTIONS)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix (Person B held-out test set)')
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150)
plt.show()
print('Confusion matrix saved to confusion_matrix.png')

In [None]:
# Cell 9: Training History Plots
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(history.history['categorical_accuracy'], label='Train Accuracy')
axes[0].plot(history.history['val_categorical_accuracy'], label='Val Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(history.history['loss'], label='Train Loss')
axes[1].plot(history.history['val_loss'], label='Val Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150)
plt.show()
print('Training history saved to training_history.png')

In [None]:
# Cell 10: Save + Convert to TFLite + Download
import tensorflow as tf
from tensorflow.keras.models import load_model

MODEL_PATH = os.path.join(MODELS_DIR, 'action_model.keras')
best_model.save(MODEL_PATH)
np.save('actions.npy', ACTIONS)

# Clone model onto CPU to avoid CuDNN ops that TFLite can't handle
with tf.device('/cpu:0'):
    cpu_model = tf.keras.models.clone_model(best_model)
    cpu_model.set_weights(best_model.get_weights())

converter = tf.lite.TFLiteConverter.from_keras_model(cpu_model)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS,
]
converter._experimental_lower_tensor_list_ops = False
tflite_model = converter.convert()

tflite_path = os.path.join(MODELS_DIR, 'action_model.tflite')
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)
print(f'TFLite model saved: {len(tflite_model)/1024:.1f} KB')

# Download to your PC
from google.colab import files
files.download(MODEL_PATH)
files.download(tflite_path)
files.download('actions.npy')
files.download('confusion_matrix.png')
print('Downloads started - place action_model.tflite and actions.npy in ml/models/ locally')