# Pneumonia classifier CNN builder

In [1]:
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from sklearn.metrics import precision_recall_curve

In [2]:
img_height, img_width = 180, 180
batch_size = 32

# Load datasets
train_ds = image_dataset_from_directory(
    "datasets/chest_xray/train",
    label_mode="binary",
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True
)

val_ds = image_dataset_from_directory(
    "datasets/chest_xray/val",
    label_mode="binary",
    image_size=(img_height, img_width),
    batch_size=batch_size
)

test_ds = image_dataset_from_directory(
    "datasets/chest_xray/test",
    label_mode="binary",
    image_size=(img_height, img_width),
    batch_size=batch_size
)

Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.
Found 624 files belonging to 2 classes.


In [3]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

In [4]:
y_train = []
for _, label in train_ds.unbatch():
    y_train.append(int(label.numpy()))

weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(weights))

  y_train.append(int(label.numpy()))


In [5]:
model = tf.keras.models.Sequential([
    data_augmentation,
    tf.keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(128, 3, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),  # Dropout before dense layer
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,  # Let it run longer, early stopping will stop it safely
    class_weight=class_weights,
    callbacks=[early_stop]
)

Epoch 1/30


  super().__init__(**kwargs)


[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 264ms/step - accuracy: 0.4138 - auc: 0.5447 - loss: 0.9770 - precision: 0.7504 - recall: 0.2965 - val_accuracy: 0.6875 - val_auc: 0.8438 - val_loss: 0.6250 - val_precision: 0.8000 - val_recall: 0.5000
Epoch 2/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 252ms/step - accuracy: 0.8021 - auc: 0.8870 - loss: 0.5029 - precision: 0.9270 - recall: 0.7937 - val_accuracy: 0.6875 - val_auc: 0.8438 - val_loss: 0.6609 - val_precision: 0.6154 - val_recall: 1.0000
Epoch 3/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 252ms/step - accuracy: 0.8496 - auc: 0.9371 - loss: 0.3982 - precision: 0.9499 - recall: 0.8397 - val_accuracy: 0.8750 - val_auc: 0.8047 - val_loss: 0.5865 - val_precision: 0.8750 - val_recall: 0.8750
Epoch 4/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 257ms/step - accuracy: 0.8758 - auc: 0.9496 - loss: 0.3612 - precision: 0.9573 - recall: 0.8699 

In [6]:
test_loss, test_acc, test_auc, test_prec, test_rec = model.evaluate(test_ds)

# Confusion matrix
y_true = []
y_pred_probs = []

for images, labels in test_ds:
    preds = model.predict(images)
    y_pred_probs.extend(preds.flatten())        # predicted probabilities
    y_true.extend(labels.numpy().flatten())     # actual ground truth labels

# Compute precision-recall pairs for different thresholds
precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred_probs)
f1s = [2*(p*r)/(p + r + 1e-8) for p, r in zip(precisions, recalls)]

# Find best threshold
best_idx = np.argmax(f1s)
best_threshold = thresholds[best_idx]

print(f"\nBest Threshold: {best_threshold:.2f}")
print(f"Precision: {precisions[best_idx]:.2f}, Recall: {recalls[best_idx]:.2f}, F1: {f1s[best_idx]:.2f}")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 77ms/step - accuracy: 0.8824 - auc: 0.9442 - loss: 0.3951 - precision: 0.9046 - recall: 0.9036
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [7]:
# Recompute predictions using the new threshold
y_pred_opt = [1 if prob > best_threshold else 0 for prob in y_pred_probs]

# New confusion matrix and report
cm = confusion_matrix(y_true, y_pred_opt)

print("\nUpdated Classification Report:")
print(classification_report(y_true, y_pred_opt, target_names=["Normal", "Pneumonia"]))


Updated Classification Report:
              precision    recall  f1-score   support

      Normal       0.87      0.73      0.79       234
   Pneumonia       0.85      0.94      0.89       390

    accuracy                           0.86       624
   macro avg       0.86      0.83      0.84       624
weighted avg       0.86      0.86      0.85       624



In [9]:
model.save("models/chest_xray_cnn.keras")