# Cell 1: Imports

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, utils
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

2025-11-11 15:53:03.496179: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-11 15:53:04.699850: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-11 15:53:07.426248: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


# Cell 2: Load the new dataset

In [2]:
print("Loading landmark dataset...")
data = pd.read_csv('asl_landmarks_v2.csv')

# Separate features (the 63 numbers) from labels
X = data.drop('label', axis=1)
y = data['label']

print(f"Features shape: {X.shape}") # Should be (87000, 63)
print(f"Labels shape: {y.shape}")

Loading landmark dataset...
Features shape: (69347, 63)
Labels shape: (69347,)


# Cell 3: Pre-process the data

In [3]:
print("Processing labels...")
# 1. Convert string labels ('A', 'B'...) to numbers (0, 1...)
label_encoder = LabelEncoder()
y_int = label_encoder.fit_transform(y)

# 2. One-hot encode the numbers (e.g., 2 -> [0, 0, 1, 0...])
num_classes = len(np.unique(y_int))
y_categorical = utils.to_categorical(y_int, num_classes=num_classes)

# 3. Create training and validation sets
X_train, X_test, y_train, y_test = train_test_split(
    X.values, # Convert DataFrame to numpy array
    y_categorical, 
    test_size=0.2, # 20% for testing
    random_state=42
)
print("Data ready.")

Processing labels...
Data ready.


# Cell 4: Build the new, simple "MLP" model

In [4]:
model = models.Sequential([
    # Input layer expects a 1D vector of 63 numbers
    layers.Input(shape=(63,)),
    
    # "Brain" of the model
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    
    # Output layer: 29 classes, 'softmax' for probabilities
    layers.Dense(num_classes, activation='softmax')
])

model.summary()

I0000 00:00:1762876422.605944  983363 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5561 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


# Cell 5: Compile the model

In [5]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy', # Use 'categorical' (not 'sparse')
    metrics=['accuracy']
)

# Cell 6: Train the model (This will be VERY fast)

In [6]:
print("Training new landmark model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50, # We can use more epochs since it's so fast
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5),
        tf.keras.callbacks.ModelCheckpoint('asl_landmark_model.keras', save_best_only=True)
    ]
)

print("New landmark model trained and saved as 'asl_landmark_model.keras'!")

Training new landmark model...
Epoch 1/50


2025-11-11 15:54:26.207903: I external/local_xla/xla/service/service.cc:163] XLA service 0x738dc001c9d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-11 15:54:26.207941: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-11-11 15:54:26.258845: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-11 15:54:26.504131: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91500
2025-11-11 15:54:26.626972: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-11 15:54:26.

[1m  57/1734[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 3ms/step - accuracy: 0.1046 - loss: 3.2449    

I0000 00:00:1762876473.380590  983557 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1725/1734[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.6403 - loss: 1.1648

2025-11-11 15:54:38.385288: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-11 15:54:38.385353: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-11 15:54:38.385381: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-11 15:54:38.385391: I external/l

[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6412 - loss: 1.1620

2025-11-11 15:54:43.240815: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-11 15:54:43.240901: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.





[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.7999 - loss: 0.6152 - val_accuracy: 0.9301 - val_loss: 0.2192
Epoch 2/50
[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9172 - loss: 0.2379 - val_accuracy: 0.9263 - val_loss: 0.1907
Epoch 3/50
[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9267 - loss: 0.2047 - val_accuracy: 0.9406 - val_loss: 0.1633
Epoch 4/50
[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9311 - loss: 0.1858 - val_accuracy: 0.9447 - val_loss: 0.1480
Epoch 5/50
[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9350 - loss: 0.1740 - val_accuracy: 0.9461 - val_loss: 0.1394
Epoch 6/50
[1m1734/1734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9376 - loss: 0.1644 - val_accuracy: 0.9469 - val_loss: 0.1332
Epoch 7/50
[1m1734/1734[