# ü©∫ Pneumonia Detection from Chest X-Ray Images
## Deep Learning with EfficientNetB0 Transfer Learning

This notebook trains a pneumonia detection model using chest X-ray images.

### Instructions:
1. Upload your dataset ZIP file (archive.zip) to Colab
2. Run all cells in order
3. Download the trained model at the end

## 1. Setup & Install Dependencies

In [None]:
# Check GPU availability
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Import libraries
import os
import zipfile
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization, Input

from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print("‚úÖ All libraries imported successfully!")

## 2. Upload & Extract Dataset

Upload your `archive.zip` file containing the chest X-ray dataset.

In [None]:
# Upload dataset
from google.colab import files
print("üì§ Please upload your archive.zip file:")
uploaded = files.upload()

In [None]:
# Extract dataset
zip_file = list(uploaded.keys())[0]
print(f"üì¶ Extracting {zip_file}...")

with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall('dataset')

# Find the chest_xray folder
for root, dirs, files_list in os.walk('dataset'):
    if 'train' in dirs and 'test' in dirs:
        DATA_DIR = root
        break

print(f"‚úÖ Dataset extracted to: {DATA_DIR}")
print(f"\nüìÅ Contents:")
for item in os.listdir(DATA_DIR):
    item_path = os.path.join(DATA_DIR, item)
    if os.path.isdir(item_path):
        count = sum([len(f) for r, d, f in os.walk(item_path)])
        print(f"   {item}/: {count} images")

## 3. Configuration

In [None]:
# Configuration
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.0001
CLASSES = ['NORMAL', 'PNEUMONIA']

TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'val')
TEST_DIR = os.path.join(DATA_DIR, 'test')

print("‚öôÔ∏è Configuration:")
print(f"   Image Size: {IMG_SIZE}x{IMG_SIZE}")
print(f"   Batch Size: {BATCH_SIZE}")
print(f"   Epochs: {EPOCHS}")
print(f"   Learning Rate: {LEARNING_RATE}")

## 4. Data Visualization

In [None]:
# Visualize sample images
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
fig.suptitle('Sample Chest X-Ray Images', fontsize=16, fontweight='bold')

for idx, class_name in enumerate(CLASSES):
    class_dir = os.path.join(TRAIN_DIR, class_name)
    images = os.listdir(class_dir)[:4]
    
    for i, img_name in enumerate(images):
        img_path = os.path.join(class_dir, img_name)
        img = plt.imread(img_path)
        axes[idx, i].imshow(img, cmap='gray')
        axes[idx, i].set_title(class_name)
        axes[idx, i].axis('off')

plt.tight_layout()
plt.savefig('sample_images.png', dpi=150)
plt.show()

In [None]:
# Class distribution
train_normal = len(os.listdir(os.path.join(TRAIN_DIR, 'NORMAL')))
train_pneumonia = len(os.listdir(os.path.join(TRAIN_DIR, 'PNEUMONIA')))

plt.figure(figsize=(8, 6))
plt.bar(CLASSES, [train_normal, train_pneumonia], color=['#2ecc71', '#e74c3c'])
plt.title('Training Set Class Distribution', fontsize=14, fontweight='bold')
plt.ylabel('Number of Images')
for i, v in enumerate([train_normal, train_pneumonia]):
    plt.text(i, v + 50, str(v), ha='center', fontweight='bold')
plt.tight_layout()
plt.savefig('class_distribution.png', dpi=150)
plt.show()

print(f"\nüìä Class Distribution:")
print(f"   NORMAL: {train_normal} images ({train_normal/(train_normal+train_pneumonia)*100:.1f}%)")
print(f"   PNEUMONIA: {train_pneumonia} images ({train_pneumonia/(train_normal+train_pneumonia)*100:.1f}%)")

## 5. Data Generators with Augmentation

In [None]:
# Training data with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# Validation/Test - only rescaling
val_test_datagen = ImageDataGenerator(rescale=1./255)

print("üìÅ Loading datasets...")

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    classes=CLASSES,
    shuffle=True
)

val_generator = val_test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    classes=CLASSES,
    shuffle=False
)

test_generator = val_test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    classes=CLASSES,
    shuffle=False
)

print(f"\n‚úÖ Data loaded:")
print(f"   Training: {train_generator.samples} images")
print(f"   Validation: {val_generator.samples} images")
print(f"   Test: {test_generator.samples} images")

## 6. Build EfficientNetB0 Model

In [None]:
def create_model():
    """Create EfficientNetB0-based model for pneumonia detection."""
    print("üèóÔ∏è Building EfficientNetB0 model...")
    
    # Load pre-trained EfficientNetB0
    base_model = EfficientNetB0(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_SIZE, IMG_SIZE, 3)
    )
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Build classification head
    inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs, outputs)
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='binary_crossentropy',
        metrics=['accuracy',
                 tf.keras.metrics.Precision(name='precision'),
                 tf.keras.metrics.Recall(name='recall'),
                 tf.keras.metrics.AUC(name='auc')]
    )
    
    return model, base_model

model, base_model = create_model()

print(f"\n‚úÖ Model created!")
print(f"   Total parameters: {model.count_params():,}")
print(f"   Trainable parameters: {sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]):,}")

In [None]:
# Model summary
model.summary()

## 7. Training - Phase 1 (Frozen Base)

In [None]:
# Calculate class weights for imbalanced data
total = train_generator.samples
class_counts = np.bincount(train_generator.classes)
class_weights = {
    0: total / (2 * class_counts[0]),
    1: total / (2 * class_counts[1])
}
print(f"üìä Class weights: {class_weights}")

In [None]:
# Callbacks
callbacks_list = [
    callbacks.ModelCheckpoint(
        'pneumonia_model_best.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-7,
        verbose=1
    )
]

In [None]:
print("\n" + "="*50)
print("   PHASE 1: Training with Frozen Base Model")
print("="*50 + "\n")

history1 = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks_list,
    class_weight=class_weights,
    verbose=1
)

## 8. Training - Phase 2 (Fine-tuning)

In [None]:
print("\n" + "="*50)
print("   PHASE 2: Fine-tuning")
print("="*50 + "\n")

# Unfreeze top layers of base model
base_model.trainable = True
fine_tune_at = 100

for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy',
             tf.keras.metrics.Precision(name='precision'),
             tf.keras.metrics.Recall(name='recall'),
             tf.keras.metrics.AUC(name='auc')]
)

trainable_params = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
print(f"üîì Trainable parameters after unfreezing: {trainable_params:,}")

In [None]:
# Fine-tune training
callbacks_list2 = [
    callbacks.ModelCheckpoint(
        'pneumonia_model_finetuned.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=2,
        min_lr=1e-7,
        verbose=1
    )
]

history2 = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    callbacks=callbacks_list2,
    class_weight=class_weights,
    verbose=1
)

## 9. Training History Visualization

In [None]:
# Combine histories
history = {}
for key in history1.history:
    history[key] = history1.history[key] + history2.history[key]

# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Accuracy
axes[0, 0].plot(history['accuracy'], label='Train', linewidth=2)
axes[0, 0].plot(history['val_accuracy'], label='Validation', linewidth=2)
axes[0, 0].axvline(x=len(history1.history['accuracy'])-1, color='r', linestyle='--', label='Fine-tune start')
axes[0, 0].set_title('Model Accuracy', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Loss
axes[0, 1].plot(history['loss'], label='Train', linewidth=2)
axes[0, 1].plot(history['val_loss'], label='Validation', linewidth=2)
axes[0, 1].axvline(x=len(history1.history['loss'])-1, color='r', linestyle='--', label='Fine-tune start')
axes[0, 1].set_title('Model Loss', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Precision
axes[1, 0].plot(history['precision'], label='Train', linewidth=2)
axes[1, 0].plot(history['val_precision'], label='Validation', linewidth=2)
axes[1, 0].set_title('Model Precision', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Recall
axes[1, 1].plot(history['recall'], label='Train', linewidth=2)
axes[1, 1].plot(history['val_recall'], label='Validation', linewidth=2)
axes[1, 1].set_title('Model Recall', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150)
plt.show()

## 10. Model Evaluation

In [None]:
print("\n" + "="*50)
print("   MODEL EVALUATION ON TEST SET")
print("="*50 + "\n")

# Get predictions
test_generator.reset()
predictions = model.predict(test_generator, verbose=1)
y_pred_proba = predictions.flatten()
y_pred = (y_pred_proba > 0.5).astype(int)
y_true = test_generator.classes

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Model evaluate
test_loss, test_acc, test_precision, test_recall, test_auc = model.evaluate(test_generator, verbose=0)

print("\n" + "="*50)
print("         FINAL RESULTS")
print("="*50)
print(f"  ‚úÖ Accuracy:  {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"  ‚úÖ Precision: {precision:.4f} ({precision*100:.2f}%)")
print(f"  ‚úÖ Recall:    {recall:.4f} ({recall*100:.2f}%)")
print(f"  ‚úÖ F1-Score:  {f1:.4f} ({f1*100:.2f}%)")
print(f"  ‚úÖ AUC:       {test_auc:.4f}")
print(f"  ‚úÖ Loss:      {test_loss:.4f}")
print("="*50)

In [None]:
# Classification Report
print("\nüìã Classification Report:")
print(classification_report(y_true, y_pred, target_names=CLASSES))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(
    cm, 
    annot=True, 
    fmt='d', 
    cmap='Blues',
    xticklabels=CLASSES,
    yticklabels=CLASSES,
    annot_kws={'size': 20}
)
plt.title('Confusion Matrix', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150)
plt.show()

print(f"\nüìä Confusion Matrix Analysis:")
print(f"   True Negatives (Normal ‚Üí Normal): {cm[0,0]}")
print(f"   False Positives (Normal ‚Üí Pneumonia): {cm[0,1]}")
print(f"   False Negatives (Pneumonia ‚Üí Normal): {cm[1,0]}")
print(f"   True Positives (Pneumonia ‚Üí Pneumonia): {cm[1,1]}")

In [None]:
# ROC Curve
fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('Receiver Operating Characteristic (ROC) Curve', fontsize=16, fontweight='bold')
plt.legend(loc='lower right', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('roc_curve.png', dpi=150)
plt.show()

## 11. Save Final Model

In [None]:
# Save model in multiple formats
model.save('pneumonia_model_final.keras')
model.save('pneumonia_model_final.h5')

print("‚úÖ Model saved as:")
print("   - pneumonia_model_final.keras")
print("   - pneumonia_model_final.h5")

In [None]:
# Save evaluation report
with open('evaluation_report.txt', 'w') as f:
    f.write("="*60 + "\n")
    f.write("    PNEUMONIA DETECTION MODEL - EVALUATION REPORT\n")
    f.write("="*60 + "\n\n")
    f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    f.write("PERFORMANCE METRICS\n")
    f.write("-"*40 + "\n")
    f.write(f"Accuracy:  {accuracy:.4f} ({accuracy*100:.2f}%)\n")
    f.write(f"Precision: {precision:.4f} ({precision*100:.2f}%)\n")
    f.write(f"Recall:    {recall:.4f} ({recall*100:.2f}%)\n")
    f.write(f"F1-Score:  {f1:.4f} ({f1*100:.2f}%)\n")
    f.write(f"AUC:       {test_auc:.4f}\n")
    f.write(f"Loss:      {test_loss:.4f}\n\n")
    f.write("CLASSIFICATION REPORT\n")
    f.write("-"*40 + "\n")
    f.write(classification_report(y_true, y_pred, target_names=CLASSES))

print("‚úÖ Evaluation report saved!")

## 12. Download Results

In [None]:
# Create zip with all results
import shutil

results_files = [
    'pneumonia_model_final.keras',
    'pneumonia_model_final.h5',
    'confusion_matrix.png',
    'roc_curve.png',
    'training_history.png',
    'sample_images.png',
    'class_distribution.png',
    'evaluation_report.txt'
]

with zipfile.ZipFile('pneumonia_detection_results.zip', 'w') as zipf:
    for file in results_files:
        if os.path.exists(file):
            zipf.write(file)

print("üì¶ Results packaged!")
print("\nüì• Downloading results...")
files.download('pneumonia_detection_results.zip')

## 13. Test Prediction on Sample Image

In [None]:
# Test on a random image from test set
from tensorflow.keras.preprocessing import image
import random

# Pick random test image
test_class = random.choice(CLASSES)
test_images = os.listdir(os.path.join(TEST_DIR, test_class))
test_img_name = random.choice(test_images)
test_img_path = os.path.join(TEST_DIR, test_class, test_img_name)

# Load and preprocess
img = image.load_img(test_img_path, target_size=(IMG_SIZE, IMG_SIZE))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0) / 255.0

# Predict
prediction = model.predict(img_array, verbose=0)[0][0]

if prediction > 0.5:
    result = "PNEUMONIA"
    confidence = prediction
else:
    result = "NORMAL"
    confidence = 1 - prediction

# Display
plt.figure(figsize=(8, 8))
plt.imshow(image.load_img(test_img_path), cmap='gray')
plt.title(f"Prediction: {result} ({confidence:.1%})\nActual: {test_class}", fontsize=14, fontweight='bold')
plt.axis('off')
plt.tight_layout()
plt.show()

print(f"\nüîç Sample Prediction:")
print(f"   Image: {test_img_name}")
print(f"   Actual Label: {test_class}")
print(f"   Predicted: {result}")
print(f"   Confidence: {confidence:.2%}")
print(f"   Correct: {'‚úÖ YES' if result == test_class else '‚ùå NO'}")

---
## üéâ Training Complete!

Your trained model and results have been downloaded. The zip file contains:
- `pneumonia_model_final.keras` - Trained model (Keras format)
- `pneumonia_model_final.h5` - Trained model (H5 format)
- `confusion_matrix.png` - Confusion matrix visualization
- `roc_curve.png` - ROC curve plot
- `training_history.png` - Training metrics over epochs
- `evaluation_report.txt` - Detailed metrics report