In [1]:
!nvidia-smi
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))  
print("GPU Details:", tf.config.list_physical_devices('GPU'))


Thu Jan 22 17:19:24 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...    Off |   00000000:01:00.0  On |                  N/A |
| N/A   38C    P5              5W /   55W |    6302MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

2026-01-22 17:19:24.713883: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  if not hasattr(np, "object"):


TensorFlow version: 2.20.0
Num GPUs Available:  1
GPU Details: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Load dataset

In [3]:
from load_dataset import load_dataset
train_gen, val_gen, train_gen_class_indc = load_dataset(batch_size=128)

Found 33240 images belonging to 8 classes.
Found 8306 images belonging to 8 classes.


# Model creation

In [4]:
from tensorflow.keras import layers, models

# ============================================================
# SPATIAL CORE
# - This is the DEPLOYMENT model (sliding window FCN)
# - Input is dynamic: works on any image size
# - Output is (h, w, num_classes)
# ============================================================

def build_spatial_core(num_classes=8):
    inp = layers.Input(shape=(None, None, 3))

    # --------------------------------------------------------
    # BACKBONE START
    # Replace this block with ANY base model later
    # Examples:
    # - ResNet50 (include_top=False)
    # - MobileNetV2
    # - EfficientNet
    #
    # Rule:
    #   base_model(inp)  -> feature map (h, w, c)
    # --------------------------------------------------------

    x = layers.Conv2D(32, 3, activation="relu")(inp)
    x = layers.MaxPooling2D(2)(x)

    x = layers.Conv2D(64, 3, activation="relu")(x)
    x = layers.MaxPooling2D(2)(x)

    x = layers.Conv2D(128, 3, activation="relu")(x)
    x = layers.MaxPooling2D(2)(x)

    # --------------------------------------------------------
    # BACKBONE END
    # --------------------------------------------------------

    # --------------------------------------------------------
    # FIXED WINDOW COLLAPSE
    # This kernel size is chosen so that:
    #   input 224x224 -> output 1x1
    # Larger inputs -> sliding grid
    # --------------------------------------------------------
    x = layers.Conv2D(128, 26, activation="relu")(x)

    # --------------------------------------------------------
    # CLASS CONFIDENCE PER WINDOW
    # DO NOT put softmax here
    # --------------------------------------------------------
    out = layers.Conv2D(num_classes, 1)(x)   # (h, w, num_classes)

    return models.Model(inp, out, name="spatial_core")


# ============================================================
# TRAINING MODEL
# - This is ONLY for training
# - Uses fixed input size because of ImageDataGenerator
# - Collapses spatial output to match image-level labels
# ============================================================

def build_training_model(spatial_core, num_classes=8):
    inp = layers.Input(shape=(224, 224, 3))   # fixed for generator

    # spatial_core produces (1,1,num_classes) for 224x224
    x = spatial_core(inp)

    # Collapse spatial dims so loss sees (num_classes)
    x = layers.GlobalAveragePooling2D()(x)

    # Image-level classification head
    out = layers.Activation("softmax")(x)

    return models.Model(inp, out, name="training_model")


In [None]:
spatial_core = build_spatial_core(num_classes=len(train_gen_class_indc))
training_model = build_training_model(spatial_core, num_classes=len(train_gen_class_indc))

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

callbacks = [
    ModelCheckpoint(
        filepath="best_model.keras",
        monitor="val_loss",
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.3,
        patience=7,
        min_lr=1e-7,
        verbose=1
    )
]


In [None]:
training_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)


In [None]:
# ensuring model fits on GPU
history = training_model.fit(
    train_gen,
    steps_per_epoch  = len(X_train) // batch_size,
    validation_steps = len(X_val) // batch_size,
    validation_data=val_gen,
    epochs=100,
    callbacks=callbacks
)