# Pneumonia CNN Baseline
Clean multi‑cell notebook version

In [1]:
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

SEED = 42
tf.random.set_seed(SEED)

DATA_DIR = "chest_xray"   # <-- CHANGE THIS
IMG_SIZE = (160, 160)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

train_dir = os.path.join(DATA_DIR, "train")
val_dir   = os.path.join(DATA_DIR, "val")
test_dir  = os.path.join(DATA_DIR, "test")

2025-12-03 19:00:26.436750: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-03 19:00:26.903269: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-03 19:00:29.386979: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


## Load datasets

In [2]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    labels='inferred',
    label_mode='binary',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED,
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    labels='inferred',
    label_mode='binary',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    labels='inferred',
    label_mode='binary',
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

class_names = train_ds.class_names
class_names

Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.
Found 624 files belonging to 2 classes.


2025-12-03 19:00:30.181578: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


['NORMAL', 'PNEUMONIA']

In [3]:
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds   = val_ds.prefetch(AUTOTUNE)
test_ds  = test_ds.prefetch(AUTOTUNE)

## Data augmentation

In [4]:
data_augmentation = keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
])

## Build CNN model

In [5]:
def build_cnn_model(
    input_shape=IMG_SIZE + (3,),
    num_filters=(16, 32, 64),
    dense_units=64,
    dropout_rate=0.5,
):
    inputs = keras.Input(shape=input_shape)

    x = data_augmentation(inputs)
    x = layers.Rescaling(1.0/255)(x)

    for f in num_filters:
        x = layers.Conv2D(f, 3, padding='same', activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D()(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(dense_units, activation='relu')(x)
    x = layers.Dropout(dropout_rate)(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy', keras.metrics.AUC(), keras.metrics.Precision(), keras.metrics.Recall()]
    )
    return model

model = build_cnn_model()
model.summary()

## Class weights

In [6]:
normal_count = 1341
pneu_count = 3875
total = normal_count + pneu_count

class_weight = {
    0: total / (2 * normal_count),
    1: total / (2 * pneu_count),
}
class_weight

{0: 1.9448173005219984, 1: 0.6730322580645162}

## Callbacks

In [7]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    keras.callbacks.ModelCheckpoint('best_pneumonia_cnn.keras', save_best_only=True),
]

## Train

In [8]:
EPOCHS = 10

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    class_weight=class_weight,
    callbacks=callbacks,
)

Epoch 1/10


2025-12-03 19:00:38.151759: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 52428800 exceeds 10% of free system memory.
2025-12-03 19:00:38.366563: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 52428800 exceeds 10% of free system memory.
2025-12-03 19:00:38.445372: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 52428800 exceeds 10% of free system memory.
2025-12-03 19:00:39.691510: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 52428800 exceeds 10% of free system memory.
2025-12-03 19:00:39.692389: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 52428800 exceeds 10% of free system memory.


[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 1s/step - accuracy: 0.7496 - auc: 0.8235 - loss: 0.5248 - precision: 0.9020 - recall: 0.7437 - val_accuracy: 0.5000 - val_auc: 0.7109 - val_loss: 0.9328 - val_precision: 0.5000 - val_recall: 1.0000
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 1s/step - accuracy: 0.8332 - auc: 0.9137 - loss: 0.3693 - precision: 0.9624 - recall: 0.8070 - val_accuracy: 0.5000 - val_auc: 0.8906 - val_loss: 1.2745 - val_precision: 0.5000 - val_recall: 1.0000
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 1s/step - accuracy: 0.8606 - auc: 0.9298 - loss: 0.3243 - precision: 0.9713 - recall: 0.8372 - val_accuracy: 0.5000 - val_auc: 0.8984 - val_loss: 0.9912 - val_precision: 0.5000 - val_recall: 1.0000
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 972ms/step - accuracy: 0.8645 - auc: 0.9366 - loss: 0.3078 - precision: 0.9726 - recall: 0.8413 - val

## Evaluate on test set

In [9]:
test_metrics = model.evaluate(test_ds)
list(zip(model.metrics_names, test_metrics))

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 184ms/step - accuracy: 0.7196 - auc: 0.8932 - loss: 0.5696 - precision: 0.9694 - recall: 0.5692


[('loss', 0.5696073770523071), ('compile_metrics', 0.7195512652397156)]