In [1]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

from pathlib import Path
from typing import cast

import keras
import tensorflow as tf

  if not hasattr(np, "object"):


In [2]:
gpus = tf.config.list_physical_devices("GPU")
tf.config.set_logical_device_configuration(
    gpus[0],
    [tf.config.LogicalDeviceConfiguration(memory_limit=12 * 1024)],
)
logical_gpus = tf.config.list_logical_devices("GPU")
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")

1 Physical GPUs, 1 Logical GPUs


I0000 00:00:1768202515.299455   22450 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 12288 MB memory:  -> device: 0, name: AMD Radeon RX 7900 XTX, pci bus id: 0000:03:00.0


In [3]:
RANDOM_SEED = 709

BASE_DIR = Path().resolve()
DATA_DIR = BASE_DIR / "data/cat-and-dog"
TRAIN_DIR = DATA_DIR / "training_set/training_set"
TEST_DIR = DATA_DIR / "test_set/test_set"
CATEGORIES = ["cats", "dogs"]
img_w, img_h, img_ch = IMAGE_TARGET_SIZE = (224, 224, 3)
BATCH_SIZE = 128
EPOCHS = int(1e6)

## Load data

In [4]:
train_ds = cast(
    tf.data.Dataset,
    keras.utils.image_dataset_from_directory(
        TRAIN_DIR,
        validation_split=0.2,
        subset="training",
        seed=RANDOM_SEED,
        image_size=(img_h, img_w),
        batch_size=BATCH_SIZE,
    ),
)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

Found 8005 files belonging to 2 classes.
Using 6404 files for training.


In [5]:
val_ds = cast(
    tf.data.Dataset,
    keras.utils.image_dataset_from_directory(
        TRAIN_DIR,
        validation_split=0.2,
        subset="validation",
        seed=RANDOM_SEED,
        image_size=(img_h, img_w),
        batch_size=BATCH_SIZE,
    ),
)
val_ds = val_ds.prefetch(tf.data.AUTOTUNE)

Found 8005 files belonging to 2 classes.
Using 1601 files for validation.


In [6]:
total_train = len(train_ds) * BATCH_SIZE
total_val = len(val_ds) * BATCH_SIZE

total_train, total_val

(6528, 1664)

## Model definition

In [7]:
model = keras.Sequential(
    [
        keras.layers.Input(IMAGE_TARGET_SIZE),
        keras.layers.Rescaling(1 / 255),
        keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu"),
        keras.layers.MaxPooling2D(pool_size=3, strides=2),
        keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu"),
        keras.layers.MaxPooling2D(pool_size=3, strides=2),
        keras.layers.Conv2D(filters=128, kernel_size=3, activation="relu"),
        keras.layers.MaxPooling2D(pool_size=3, strides=2),
        keras.layers.Conv2D(filters=128, kernel_size=3, activation="relu"),
        keras.layers.MaxPooling2D(pool_size=3, strides=2),
        keras.layers.Flatten(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(512, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid"),
    ],
    name="catdog",
)

model.summary()

In [8]:
# tensorboard = keras.callbacks.TensorBoard(
#     str(BASE_DIR / ".tensorboard"),
#     histogram_freq=5,
#     write_images=True,
# )

sgd = keras.optimizers.SGD(
    learning_rate=1e-2,
    momentum=0.9,
)
lr_red = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=1 / 10,
    mode="min",
    min_lr=1e-4,
    verbose=True,
)
early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=int(1e-4),
    patience=5,
    mode="min",
    start_from_epoch=10,
)
checkpoint = keras.callbacks.ModelCheckpoint(
    filepath=str(DATA_DIR / "catdog.keras"),
    monitor="val_loss",
    verbose=True,
    save_best_only=True,
    mode="min",
)

model.compile(
    loss="binary_crossentropy",
    optimizer=sgd,
    metrics=["accuracy", "precision", "recall"],
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    steps_per_epoch=total_train // BATCH_SIZE,
    validation_steps=total_val // BATCH_SIZE,
    callbacks=[lr_red, early_stop, checkpoint],
)
results = history.history
results

Epoch 1/1000000


I0000 00:00:1768202562.237955   22627 service.cc:148] XLA service 0x7d299c008470 initialized for platform ROCM (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1768202562.238025   22627 service.cc:156]   StreamExecutor device (0): AMD Radeon RX 7900 XTX, AMDGPU ISA version: gfx1100
I0000 00:00:1768202595.631055   22627 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step - accuracy: 0.5193 - loss: 0.6918 - precision: 0.5148 - recall: 0.8810
Epoch 1: val_loss improved from None to 0.68239, saving model to /home/kvdomingo/mlops-play/data/cat-and-dog/catdog.keras

Epoch 1: finished saving model to /home/kvdomingo/mlops-play/data/cat-and-dog/catdog.keras
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 256ms/step - accuracy: 0.5364 - loss: 0.6908 - precision: 0.5238 - recall: 0.7192 - val_accuracy: 0.5428 - val_loss: 0.6824 - val_precision: 0.5319 - val_recall: 0.9674 - learning_rate: 0.0100
Epoch 2/1000000
[1m50/51[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 158ms/step - accuracy: 0.5554 - loss: 0.6837 - precision: 0.5425 - recall: 0.7835
Epoch 2: val_loss did not improve from 0.68239
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 161ms/step - accuracy: 0.5637 - loss: 0.6824 - precision: 0.5500 - recall: 0.6629 - val_accuracy: 0.5315 - va

{'accuracy': [0.5363835096359253,
  0.5637101531028748,
  0.5110868215560913,
  0.5399749875068665,
  0.6057151556015015,
  0.603997528553009,
  0.6074328422546387,
  0.6360087394714355,
  0.6556839346885681,
  0.6717676520347595,
  0.6955028176307678,
  0.7150218486785889,
  0.6984696984291077,
  0.7406308650970459,
  0.7556214928627014,
  0.7692067623138428,
  0.7573391795158386,
  0.7877888679504395,
  0.7823235392570496,
  0.7849781513214111,
  0.8107432723045349,
  0.8148032426834106,
  0.8021548986434937,
  0.8163647651672363,
  0.8347907662391663,
  0.8552467226982117,
  0.8591505289077759,
  0.8582136034965515,
  0.8749219179153442,
  0.8810118436813354,
  0.8794503211975098,
  0.8970955610275269,
  0.8763272762298584,
  0.908650815486908,
  0.896314799785614,
  0.8950656056404114],
 'loss': [0.6907550096511841,
  0.6823778748512268,
  0.6941961646080017,
  0.6857131123542786,
  0.6708730459213257,
  0.6621268391609192,
  0.6581111550331116,
  0.6427939534187317,
  0.6195077896

In [9]:
test_ds = cast(
    tf.data.Dataset,
    keras.utils.image_dataset_from_directory(
        TEST_DIR,
        seed=RANDOM_SEED,
        image_size=(img_h, img_w),
        batch_size=BATCH_SIZE,
        shuffle=False,
    ),
)
test_ds = test_ds.prefetch(tf.data.AUTOTUNE)
total_test = len(test_ds) * BATCH_SIZE
total_test

Found 2023 files belonging to 2 classes.


2048

In [10]:
evals = model.evaluate(
    test_ds,
    verbose=1,
    steps=total_test // BATCH_SIZE,
)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 375ms/step - accuracy: 0.8433 - loss: 0.4035 - precision: 0.8557 - recall: 0.8261  
