In [2]:
import os
import numpy as np
import cv2
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
# data loading & stratified split
def load_data(image_dir, img_size=(150, 150)):
    images, labels = [], []
    for label in ["NORMAL", "PNEUMONIA"]:
        folder = os.path.join('/content/drive/MyDrive/Colab Notebooks/chest_xray', label)
        for fname in os.listdir(folder):
            if not fname.lower().endswith(".jpeg"):
                continue
            img = cv2.imread(os.path.join(folder, fname))
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(0 if label == "NORMAL" else 1)
    return np.array(images), np.array(labels)

image_dir = "./chest_xray"
images, labels = load_data(image_dir)

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(sss.split(images, labels))
X_train, y_train = images[train_idx], labels[train_idx]
X_test,  y_test  = images[test_idx],  labels[test_idx]

In [None]:
#data generators with normalization + augmentation

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.25,
    zoom_range=0.25,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True,
    fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow(X_train, y_train, batch_size=32)
val_gen   = test_datagen.flow(X_test,  y_test,  batch_size=32)

In [None]:
#build model with transfer learning + batchnorm

base_model = MobileNetV2(input_shape=(150,150,3), include_top=False, weights='imagenet')
base_model.trainable = False  # freeze

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

  base_model = MobileNetV2(input_shape=(150,150,3), include_top=False, weights='imagenet')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
#callbacks

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
]


In [None]:
#train

history = model.fit(
    train_gen,
    steps_per_epoch=len(X_train)//32,
    epochs=25,
    validation_data=val_gen,
    validation_steps=len(X_test)//32,
    callbacks=callbacks
)

  self._warn_if_super_not_called()


Epoch 1/25
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 948ms/step - accuracy: 0.7629 - loss: 0.5384 - val_accuracy: 0.7569 - val_loss: 0.5521 - learning_rate: 0.0010
Epoch 2/25
[1m  1/146[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:44[0m 718ms/step - accuracy: 0.7812 - loss: 0.5632



[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 279ms/step - accuracy: 0.7812 - loss: 0.5632 - val_accuracy: 0.7552 - val_loss: 0.5599 - learning_rate: 0.0010
Epoch 3/25
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 916ms/step - accuracy: 0.8935 - loss: 0.2629 - val_accuracy: 0.8394 - val_loss: 0.3990 - learning_rate: 0.0010
Epoch 4/25
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 164ms/step - accuracy: 0.8750 - loss: 0.2893 - val_accuracy: 0.8516 - val_loss: 0.3808 - learning_rate: 0.0010
Epoch 5/25
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 902ms/step - accuracy: 0.8902 - loss: 0.2651 - val_accuracy: 0.9080 - val_loss: 0.2451 - learning_rate: 0.0010
Epoch 6/25
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 280ms/step - accuracy: 0.8750 - loss: 0.2922 - val_accuracy: 0.9054 - val_loss: 0.2580 - learnin

In [None]:
# fine tune

base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])
history_fine = model.fit(
    train_gen,
    steps_per_epoch=len(X_train)//32,
    epochs=10,
    validation_data=val_gen,
    validation_steps=len(X_test)//32,
    callbacks=callbacks
)

Epoch 1/10
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 1s/step - accuracy: 0.8752 - loss: 0.3242 - val_accuracy: 0.8724 - val_loss: 0.3912 - learning_rate: 1.0000e-04
Epoch 2/10
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 273ms/step - accuracy: 0.8125 - loss: 0.6111 - val_accuracy: 0.8759 - val_loss: 0.3816 - learning_rate: 1.0000e-04
Epoch 3/10
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 1s/step - accuracy: 0.9118 - loss: 0.2484 - val_accuracy: 0.9375 - val_loss: 0.1739 - learning_rate: 1.0000e-04
Epoch 4/10
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 142ms/step - accuracy: 0.9688 - loss: 0.1254 - val_accuracy: 0.9392 - val_loss: 0.1685 - learning_rate: 1.0000e-04
Epoch 5/10
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 1s/step - accuracy: 0.9160 - loss: 0.2243 - val_accuracy: 0.9184 - val_loss: 0.2184 - learning_rate: 1.0000e-04
Epoch 6/10
[1m146/146[0m [32m━━━━━━━━━

In [None]:
# evaluate

X_test_norm = X_test.astype('float32') / 255.0
y_pred_prob = model.predict(X_test_norm).flatten()
y_pred = (y_pred_prob > 0.5).astype(int)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["NORMAL","PNEUMONIA"]))

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 642ms/step
Confusion Matrix:
[[294  23]
 [ 34 822]]

Classification Report:
              precision    recall  f1-score   support

      NORMAL       0.90      0.93      0.91       317
   PNEUMONIA       0.97      0.96      0.97       856

    accuracy                           0.95      1173
   macro avg       0.93      0.94      0.94      1173
weighted avg       0.95      0.95      0.95      1173



In [None]:
# save model 

model.save("pneumonia_detection_model.h5")
print("\nModel saved to pneumonia_detection_model.h5")




Model saved to pneumonia_detection_model.h5
