
# Pneumonia Detection using Deep Learning (Chest X-Ray)

**Disclaimer:** This notebook is for **research and educational purposes only**.  
It is **not approved for clinical use**. Do not use the models trained here for live patient care without rigorous validation and regulatory approval.

We will use the **Kaggle Chest X-Ray Pneumonia dataset**:
- [Kaggle Dataset Link](https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia)



## Dataset Source

The dataset consists of chest X-ray images from pediatric patients:  
- **NORMAL** (healthy lungs)  
- **PNEUMONIA** (bacterial or viral infection)  

Dataset structure after extraction:
```
chest_xray/
    train/
        PNEUMONIA/
        NORMAL/
    val/
        PNEUMONIA/
        NORMAL/
    test/
        PNEUMONIA/
        NORMAL/
```


### Importing Libraries

In [None]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve

import joblib
from pathlib import Path


### Loading Dataset

In [None]:

# Change this path after downloading and unzipping the dataset from Kaggle
DATASET_PATH = Path("chest_xray")

train_dir = DATASET_PATH / "train"
val_dir = DATASET_PATH / "val"
test_dir = DATASET_PATH / "test"

print("Train folders:", os.listdir(train_dir))
print("Val folders:", os.listdir(val_dir))
print("Test folders:", os.listdir(test_dir))

# Count images per class
for split in [train_dir, val_dir, test_dir]:
    print(f"\nCounts in {split.name}:")
    for cls in os.listdir(split):
        n_images = len(os.listdir(split / cls))
        print(f"  {cls}: {n_images}")


### Exploratory Data Analysis (EDA)

In [None]:

import random
from tensorflow.keras.preprocessing import image

# Show sample images
fig, axes = plt.subplots(1, 4, figsize=(12,4))
for i, label in enumerate(['NORMAL', 'PNEUMONIA']):
    img_path = random.choice(list((train_dir/label).glob("*.jpeg")))
    img = image.load_img(img_path, target_size=(150,150))
    axes[i*2].imshow(img, cmap='gray')
    axes[i*2].axis('off')
    axes[i*2].set_title(label)
plt.show()

# Class distribution plot
counts = {cls: len(os.listdir(train_dir/cls)) for cls in os.listdir(train_dir)}
sns.barplot(x=list(counts.keys()), y=list(counts.values()))
plt.title("Class distribution in Training set")
plt.show()


### Data Preprocessing & Augmentation

In [None]:

IMG_SIZE = (224,224)
BATCH_SIZE = 16

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=10,
                                   width_shift_range=0.05,
                                   height_shift_range=0.05,
                                   shear_range=0.05,
                                   zoom_range=0.1,
                                   horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(train_dir, target_size=IMG_SIZE,
                                              batch_size=BATCH_SIZE, class_mode='categorical')
val_gen = val_datagen.flow_from_directory(val_dir, target_size=IMG_SIZE,
                                          batch_size=BATCH_SIZE, class_mode='categorical')
test_gen = val_datagen.flow_from_directory(test_dir, target_size=IMG_SIZE,
                                           batch_size=BATCH_SIZE, class_mode='categorical',
                                           shuffle=False)

class_indices = train_gen.class_indices
print("Class indices:", class_indices)


### Building Transfer Learning Model (DenseNet121)

In [None]:

from tensorflow.keras.applications import DenseNet121

base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224,224,3))
for layer in base_model.layers[:-10]:
    layer.trainable = False

x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
output = layers.Dense(2, activation='softmax')(x)

model = models.Model(inputs=base_model.input, outputs=output)

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

model.summary()


### Training the Model

In [None]:

callbacks = [
    tf.keras.callbacks.ModelCheckpoint("best_model.h5", save_best_only=True, monitor='val_auc', mode='max'),
    tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_auc', mode='max', restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_auc', factor=0.5, patience=3, mode='max')
]

history = model.fit(train_gen, validation_data=val_gen, epochs=10, callbacks=callbacks)

# Plot training curves
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.title("Accuracy")
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title("Loss")
plt.show()


### Model Evaluation

In [None]:

# Predictions on test set
y_pred_probs = model.predict(test_gen)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = test_gen.classes

print(classification_report(y_true, y_pred, target_names=list(class_indices.keys())))

cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_indices.keys(), yticklabels=class_indices.keys())
plt.title("Confusion Matrix")
plt.show()

auc = roc_auc_score(y_true, y_pred_probs[:,1])
print("Test ROC AUC:", auc)


### Explainability with Grad-CAM

In [None]:

import cv2

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]
    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / (tf.math.reduce_max(heatmap) + 1e-8)
    return heatmap.numpy()

# Example usage
img_path = list((test_dir/'PNEUMONIA').glob('*.jpeg'))[0]
img = image.load_img(img_path, target_size=IMG_SIZE)
arr = image.img_to_array(img)/255.0
arr = np.expand_dims(arr, axis=0)

heatmap = make_gradcam_heatmap(arr, model, 'conv5_block16_concat')

plt.imshow(img)
plt.imshow(cv2.resize(heatmap, IMG_SIZE), cmap='jet', alpha=0.5)
plt.title("Grad-CAM")
plt.axis('off')
plt.show()


### Save Model & Preprocessor

In [None]:

# Save final model
model.save("output_pneumonia_model.h5")

# Save preprocessing info
preprocessor = {
    'target_size': IMG_SIZE,
    'color_mode': 'rgb',
    'class_indices': class_indices
}
joblib.dump(preprocessor, "output_preprocessor.pkl")

print("Artifacts saved: output_pneumonia_model.h5 and output_preprocessor.pkl")


### Inference Demo

In [None]:

loaded_model = tf.keras.models.load_model("output_pneumonia_model.h5")
loaded_preproc = joblib.load("output_preprocessor.pkl")

# Test on a random image
sample_path = list((test_dir/'NORMAL').glob('*.jpeg'))[0]
img = image.load_img(sample_path, target_size=loaded_preproc['target_size'], color_mode=loaded_preproc['color_mode'])
arr = image.img_to_array(img)/255.0
arr = np.expand_dims(arr, axis=0)

pred = loaded_model.predict(arr)
prob = float(pred[0,1])
label = list(loaded_preproc['class_indices'].keys())[np.argmax(pred)]

print("Prediction:", label, "Probability Pneumonia:", prob)
