## Code to check GPU integration


In [1]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for g in gpus:
            tf.config.experimental.set_memory_growth(g, True)
        print("Enabled memory growth for GPUs:", gpus)
    except Exception as e:
        print("Could not set memory growth:", e)
else:
    print("No GPUs found")


2025-11-10 15:15:20.710657: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-10 15:15:20.922188: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-10 15:15:22.469049: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Enabled memory growth for GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Imports


In [2]:
import pathlib, os
from pathlib import Path
import tensorflow as tf
from collections import Counter
import numpy as np
from tensorflow.keras import layers, models, optimizers, callbacks, regularizers
import datetime

## Constants

In [3]:
DATA_DIR = str("asl_alphabet_train")
IMG_SIZE = (200, 200)
BATCH_SIZE = 32
SEED = 42
VAL_SPLIT = 0.15

## Dataset imports

In [4]:
# training dataset (subset="training") and validation dataset (subset="validation")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    validation_split=VAL_SPLIT,
    subset="training",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    validation_split=VAL_SPLIT,
    subset="validation",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
)

Found 87000 files belonging to 29 classes.
Using 73950 files for training.


I0000 00:00:1762787728.935234  462448 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5561 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Found 87000 files belonging to 29 classes.
Using 13050 files for validation.


## Class checks

In [5]:
class_names = train_ds.class_names
print("Found classes:", len(class_names))
print(class_names)

Found classes: 29
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


In [6]:
from collections import Counter
p = pathlib.Path(DATA_DIR)
counts = {d.name: len(list(d.glob("*"))) for d in p.iterdir() if d.is_dir()}
for c, n in sorted(counts.items()):
    print(f"{c:20s} {n}")


A                    3000
B                    3000
C                    3000
D                    3000
E                    3000
F                    3000
G                    3000
H                    3000
I                    3000
J                    3000
K                    3000
L                    3000
M                    3000
N                    3000
O                    3000
P                    3000
Q                    3000
R                    3000
S                    3000
T                    3000
U                    3000
V                    3000
W                    3000
X                    3000
Y                    3000
Z                    3000
del                  3000
nothing              3000
space                3000


In [7]:
type(train_ds)

tensorflow.python.data.ops.prefetch_op._PrefetchDataset

In [8]:
iterator = iter(train_ds)

first_batch = next(iterator)

images_batch, labels_batch = first_batch


print(f"Type of images_batch: {type(images_batch)}")
print(f"Images batch shape: {images_batch.shape}")
print(f"Labels batch shape: {labels_batch.shape}")

print("\n--- Example from the batch ---")
print(f"Shape of one image: {images_batch[0].shape}")
print(f"Label for first image: {labels_batch[0]}")

Type of images_batch: <class 'tensorflow.python.framework.ops.EagerTensor'>
Images batch shape: (32, 200, 200, 3)
Labels batch shape: (32,)

--- Example from the batch ---
Shape of one image: (200, 200, 3)
Label for first image: 25


## Optimize pipeline

In [9]:
AUTOTUNE = tf.data.AUTOTUNE

# Add back AGGRESSIVE augmentation
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomRotation(0.1),  
    tf.keras.layers.RandomZoom(0.1),    
    tf.keras.layers.RandomTranslation(height_factor=0.2, width_factor=0.2), 
    tf.keras.layers.RandomBrightness(0.1), 
    tf.keras.layers.RandomContrast(0.1)    
])

# Normalization
preprocess_rescale = tf.keras.layers.Rescaling(1./255)

# --- Apply augmentation, THEN rescale ---
train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y), num_parallel_calls=AUTOTUNE)
train_ds = train_ds.map(lambda x, y: (preprocess_rescale(x), y), num_parallel_calls=AUTOTUNE) # <-- THIS LINE IS ESSENTIAL
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)

# --- Rescale validation data ---
val_ds = val_ds.map(lambda x, y: (preprocess_rescale(x), y), num_parallel_calls=AUTOTUNE) # <-- THIS LINE IS ESSENTIAL
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

In [10]:
num_classes = len(class_names)
input_shape = (*IMG_SIZE, 3) 
# We don't need l2_reg for now
# l2_reg = regularizers.l2(0.001) 

model = models.Sequential([
    layers.Input(shape=input_shape),

    # --- Convolutional Blocks (No L2 reg) ---
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),

    # --- Classifier Head (No L2 reg) ---
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2), 
    layers.Dense(num_classes, activation='softmax')
])

model.summary()

In [11]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy', # <-- Perfect for integer labels!
    metrics=['accuracy']
)

In [12]:
# Create a log directory for this specific run
# The datetime string makes each run unique in TensorBoard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Create the TensorBoard callback
tensorboard_callback = callbacks.TensorBoard(
    log_dir=log_dir,
    histogram_freq=1  # This logs weight histograms every epoch
)

In [13]:
# Create a callback to stop training if validation accuracy doesn't improve
early_stopping = callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=3, # Stop after 3 epochs of no improvement
    restore_best_weights=True # Automatically restore the best model weights
)

# Create a callback to save your best model to a file
model_checkpoint = callbacks.ModelCheckpoint(
    'model1.keras', # File name
    monitor='val_accuracy',
    save_best_only=True
)

In [14]:
EPOCHS = 20 # Start with 20, EarlyStopping will stop it if it's done sooner

history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=[early_stopping,
     model_checkpoint,
     tensorboard_callback
     ] # Pass in our helpers
)

Epoch 1/20


2025-11-10 15:15:38.513549: I external/local_xla/xla/service/service.cc:163] XLA service 0x78d98c002390 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-10 15:15:38.513629: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-11-10 15:15:38.670057: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-10 15:15:39.311742: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91500
2025-11-10 15:15:39.414943: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-10 15:15:39.

[1m2310/2311[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 71ms/step - accuracy: 0.1017 - loss: 3.0923

2025-11-10 15:18:36.260959: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.




[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.1018 - loss: 3.0920

2025-11-10 15:18:46.872807: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.




2025-11-10 15:18:58.070775: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.





[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 82ms/step - accuracy: 0.2346 - loss: 2.5507 - val_accuracy: 0.6499 - val_loss: 0.9913
Epoch 2/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 74ms/step - accuracy: 0.7135 - loss: 0.8587 - val_accuracy: 0.9249 - val_loss: 0.2465
Epoch 3/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 75ms/step - accuracy: 0.8364 - loss: 0.4851 - val_accuracy: 0.9396 - val_loss: 0.1888
Epoch 4/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 75ms/step - accuracy: 0.8820 - loss: 0.3483 - val_accuracy: 0.9595 - val_loss: 0.1148
Epoch 5/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 75ms/step - accuracy: 0.9067 - loss: 0.2762 - val_accuracy: 0.9474 - val_loss: 0.1461
Epoch 6/20
[1m   2/2311[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:12[0m 83ms/step - accuracy: 0.9062 - loss: 0.2862  

2025-11-10 15:30:35.024325: W tensorflow/core/kernels/data/prefetch_autotuner.cc:55] Prefetch autotuner tried to allocate 15360256 bytes after encountering the first element of size 15360256 bytes.This already causes the autotune ram budget to be exceeded. To stay within the ram budget, either increase the ram budget or reduce element size


[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 75ms/step - accuracy: 0.9235 - loss: 0.2310 - val_accuracy: 0.9703 - val_loss: 0.0854
Epoch 7/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 79ms/step - accuracy: 0.9350 - loss: 0.1949 - val_accuracy: 0.9762 - val_loss: 0.0644
Epoch 8/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 73ms/step - accuracy: 0.9432 - loss: 0.1706 - val_accuracy: 0.9932 - val_loss: 0.0274
Epoch 9/20
[1m2311/2311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 78ms/step - accuracy: 0.9504 - loss: 0.1482 - val_accuracy: 0.9877 - val_loss: 0.0367
Epoch 10/20
[1m 116/2311[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4:25[0m 121ms/step - accuracy: 0.9537 - loss: 0.1582

KeyboardInterrupt: 