In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
# paths
train_dir = "/content/drive/MyDrive/chest_xray_dataset/train"
test_dir = "/content/drive/MyDrive/chest_xray_dataset/test"
# val_dir = "/content/drive/MyDrive/chest_xray_dataset/val"

# load traninig dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.15,
    subset='training',
    labels='inferred',
    label_mode='binary',
    image_size=(150, 150),
    batch_size=32,
    shuffle=True,
    seed=123
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.15,
    subset='validation',
    labels='inferred',
    label_mode='binary',
    image_size=(150, 150),
    batch_size=32,
    shuffle=True,
    seed=123
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    labels='inferred',
    label_mode='binary',
    image_size=(150, 150),
    batch_size=32
)

Found 5216 files belonging to 2 classes.
Using 4434 files for training.
Found 5216 files belonging to 2 classes.
Using 782 files for validation.
Found 624 files belonging to 2 classes.


In [None]:
# Function to normalize images
def normalize_img(image, label):
  image = tf.cast(image, tf.float32) / 255.
  return image, label

  train_ds = train_ds.map(normalize_img)
  val_ds = val_ds.map(normalize_img)
  test_ds = test_ds.map(normalize_img)

In [None]:
# optimize data loading
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras import layers
from tensorflow.keras import regularizers

In [None]:
from tensorflow.keras import layers

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.2),
])

In [None]:
model = Sequential()

# 1) Augmentation as the very first "layer"
model.add(data_augmentation)

# 2) Convolution blocks (input_shape only needed in first conv layer)
model.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(150,150,3), kernel_regularizer=regularizers.l2(0.0001)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3,3), activation='relu', kernel_regularizer=regularizers.l2(0.0001)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, kernel_size=(3,3), activation='relu', kernel_regularizer=regularizers.l2(0.0001)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

# Dense head
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.0001)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Output
model.add(Dense(1, activation='sigmoid'))  # binary


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Adam

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', AUC(name='auc')])

In [None]:
model.build(input_shape=(None,150,150,3))

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,         # reduce LR by 20%
    patience=2,
    min_lr=1e-6
)

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# get labels from dataset
labels = np.concatenate([y.numpy().flatten() for x, y in train_ds])

# compute class weights automatically
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights = dict(enumerate(class_weights))
print(class_weights)


{0: np.float64(1.916162489196197), 1: np.float64(0.6765334147085749)}


In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=[early_stop, reduce_lr], class_weight=class_weights)

Epoch 1/30
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 143ms/step - accuracy: 0.7658 - auc: 0.8820 - loss: 0.5363 - val_accuracy: 0.2417 - val_auc: 0.6881 - val_loss: 5.2893 - learning_rate: 0.0010
Epoch 2/30
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 65ms/step - accuracy: 0.8804 - auc: 0.9538 - loss: 0.3497 - val_accuracy: 0.3107 - val_auc: 0.8344 - val_loss: 3.2771 - learning_rate: 0.0010
Epoch 3/30
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 65ms/step - accuracy: 0.9148 - auc: 0.9702 - loss: 0.2975 - val_accuracy: 0.5205 - val_auc: 0.9817 - val_loss: 1.1684 - learning_rate: 0.0010
Epoch 4/30
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - accuracy: 0.9227 - auc: 0.9753 - loss: 0.2779 - val_accuracy: 0.5281 - val_auc: 0.9785 - val_loss: 1.2453 - learning_rate: 0.0010
Epoch 5/30
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 64ms/step - accuracy: 0.9283 - auc: 0.9792 -

In [None]:
test_loss, test_acc, test_auc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.4f}, Test AUC: {test_auc:.4f}")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 9s/step - accuracy: 0.8072 - auc: 0.9172 - loss: 0.7634
Test Accuracy: 0.8029, Test AUC: 0.9239


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

# Predict once
y_true = np.concatenate([y for x, y in test_ds], axis=0)
y_pred_probs = model.predict(test_ds)
y_pred_label = (y_pred_probs >= 0.5).astype(int)

print("Classification Report:\n", classification_report(y_true, y_pred_label, digits=2))
cm = confusion_matrix(y_true, y_pred_label)
print("Confusion Matrix:\n", cm)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
Classification Report:
               precision    recall  f1-score   support

         0.0       0.98      0.48      0.65       234
         1.0       0.76      0.99      0.86       390

    accuracy                           0.80       624
   macro avg       0.87      0.74      0.76       624
weighted avg       0.84      0.80      0.78       624

Confusion Matrix:
 [[113 121]
 [  2 388]]
