In [None]:
# Core libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Deep learning libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.utils import to_categorical

# Computer vision and image processing
import cv2
from PIL import Image
import albumentations as A

# Machine learning utilities
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import train_test_split

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Check GPU availability
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Dataset configuration
DATASET_PATH = '/path/to/chest_xray'  # Update this path
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
VAL_DIR = os.path.join(DATASET_PATH, 'val')
TEST_DIR = os.path.join(DATASET_PATH, 'test')

# Model parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001

# Class names
CLASS_NAMES = ['NORMAL', 'PNEUMONIA']
NUM_CLASSES = len(CLASS_NAMES)

# Create results directory
RESULTS_DIR = 'results'
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, 'models'), exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, 'plots'), exist_ok=True)

print(f"Train directory: {TRAIN_DIR}")
print(f"Validation directory: {VAL_DIR}")
print(f"Test directory: {TEST_DIR}")
print(f"Image size: {IMG_SIZE}")
print(f"Classes: {CLASS_NAMES}")

In [None]:
def explore_dataset(data_dir, split_name):
    """Explore dataset structure and class distribution"""
    print(f"\n=== {split_name.upper()} SET ANALYSIS ===")
    
    class_counts = {}
    total_images = 0
    
    for class_name in CLASS_NAMES:
        class_path = os.path.join(data_dir, class_name)
        if os.path.exists(class_path):
            count = len(os.listdir(class_path))
            class_counts[class_name] = count
            total_images += count
            print(f"{class_name}: {count} images")
    
    print(f"Total images: {total_images}")
    
    # Calculate class distribution
    if total_images > 0:
        for class_name, count in class_counts.items():
            percentage = (count / total_images) * 100
            print(f"{class_name}: {percentage:.1f}%")
    
    return class_counts

# Explore all splits
train_counts = explore_dataset(TRAIN_DIR, 'train')
val_counts = explore_dataset(VAL_DIR, 'validation')
test_counts = explore_dataset(TEST_DIR, 'test')

# Visualize class distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
splits = ['Train', 'Validation', 'Test']
counts = [train_counts, val_counts, test_counts]

for i, (split, count_dict) in enumerate(zip(splits, counts)):
    if count_dict:
        classes = list(count_dict.keys())
        values = list(count_dict.values())
        
        bars = axes[i].bar(classes, values, color=['skyblue', 'lightcoral'])
        axes[i].set_title(f'{split} Set Distribution')
        axes[i].set_ylabel('Number of Images')
        
        # Add value labels on bars
        for bar, value in zip(bars, values):
            axes[i].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                        str(value), ha='center', va='bottom')

plt.tight_layout()
plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'class_distribution.png'), dpi=300, bbox_inches='tight')
plt.show()

In [None]:
def display_sample_images(data_dir, class_names, samples_per_class=3):
    """Display sample images from each class"""
    fig, axes = plt.subplots(len(class_names), samples_per_class, 
                            figsize=(15, len(class_names) * 4))
    
    if len(class_names) == 1:
        axes = axes.reshape(1, -1)
    
    for i, class_name in enumerate(class_names):
        class_path = os.path.join(data_dir, class_name)
        if os.path.exists(class_path):
            image_files = os.listdir(class_path)[:samples_per_class]
            
            for j, img_file in enumerate(image_files):
                img_path = os.path.join(class_path, img_file)
                img = load_img(img_path, target_size=IMG_SIZE)
                
                axes[i, j].imshow(img)
                axes[i, j].set_title(f'{class_name}\n{img_file}')
                axes[i, j].axis('off')
    
    plt.suptitle('Sample Images from Dataset', fontsize=16)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'sample_images.png'), dpi=300, bbox_inches='tight')
    plt.show()

# Display sample images
display_sample_images(TRAIN_DIR, CLASS_NAMES, samples_per_class=4)

In [None]:
def analyze_image_statistics(data_dir, class_names, num_samples=100):
    """Analyze image statistics like dimensions, brightness, etc."""
    stats = {
        'widths': [],
        'heights': [],
        'mean_intensities': [],
        'std_intensities': [],
        'classes': []
    }
    
    for class_name in class_names:
        class_path = os.path.join(data_dir, class_name)
        if os.path.exists(class_path):
            image_files = os.listdir(class_path)[:num_samples]
            
            for img_file in image_files:
                img_path = os.path.join(class_path, img_file)
                try:
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        stats['widths'].append(img.shape[1])
                        stats['heights'].append(img.shape[0])
                        stats['mean_intensities'].append(np.mean(img))
                        stats['std_intensities'].append(np.std(img))
                        stats['classes'].append(class_name)
                except Exception as e:
                    print(f"Error processing {img_path}: {e}")
    
    return pd.DataFrame(stats)

# Analyze image statistics
stats_df = analyze_image_statistics(TRAIN_DIR, CLASS_NAMES, num_samples=200)

# Visualize statistics
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Image dimensions
axes[0, 0].hist(stats_df['widths'], bins=30, alpha=0.7, label='Width')
axes[0, 0].hist(stats_df['heights'], bins=30, alpha=0.7, label='Height')
axes[0, 0].set_title('Image Dimensions Distribution')
axes[0, 0].set_xlabel('Pixels')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()

# Mean intensity by class
sns.boxplot(data=stats_df, x='classes', y='mean_intensities', ax=axes[0, 1])
axes[0, 1].set_title('Mean Intensity Distribution by Class')
axes[0, 1].set_ylabel('Mean Pixel Intensity')

# Standard deviation by class
sns.boxplot(data=stats_df, x='classes', y='std_intensities', ax=axes[1, 0])
axes[1, 0].set_title('Pixel Intensity Std Distribution by Class')
axes[1, 0].set_ylabel('Std of Pixel Intensity')

# Intensity scatter plot
for class_name in CLASS_NAMES:
    class_data = stats_df[stats_df['classes'] == class_name]
    axes[1, 1].scatter(class_data['mean_intensities'], class_data['std_intensities'], 
                      alpha=0.6, label=class_name)
axes[1, 1].set_title('Mean vs Std Intensity')
axes[1, 1].set_xlabel('Mean Intensity')
axes[1, 1].set_ylabel('Std Intensity')
axes[1, 1].legend()

plt.tight_layout()
plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'image_statistics.png'), dpi=300, bbox_inches='tight')
plt.show()

# Print summary statistics
print("\nImage Statistics Summary:")
print(stats_df.groupby('classes').agg({
    'widths': ['mean', 'std', 'min', 'max'],
    'heights': ['mean', 'std', 'min', 'max'],
    'mean_intensities': ['mean', 'std'],
    'std_intensities': ['mean', 'std']
}).round(2))

In [None]:
def create_baseline_generators():
    """Create data generators with minimal augmentation (baseline)"""
    
    # Baseline: Only rescaling and minimal augmentation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=5,
        width_shift_range=0.05,
        height_shift_range=0.05,
        horizontal_flip=True,
        validation_split=0.2  # Use 20% for validation if no separate val set
    )
    
    # Validation and test: only rescaling
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create generators
    train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',  # Binary classification
        subset='training',
        seed=42,
        shuffle=True
    )
    
    # Use validation split from training data if val_dir doesn't exist
    if os.path.exists(VAL_DIR) and len(os.listdir(VAL_DIR)) > 0:
        validation_generator = val_test_datagen.flow_from_directory(
            VAL_DIR,
            target_size=IMG_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='binary',
            shuffle=False
        )
    else:
        validation_generator = train_datagen.flow_from_directory(
            TRAIN_DIR,
            target_size=IMG_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='binary',
            subset='validation',
            seed=42,
            shuffle=False
        )
    
    test_generator = val_test_datagen.flow_from_directory(
        TEST_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False
    )
    
    return train_generator, validation_generator, test_generator

# Create baseline generators
train_gen_baseline, val_gen_baseline, test_gen_baseline = create_baseline_generators()

print(f"Training samples: {train_gen_baseline.samples}")
print(f"Validation samples: {val_gen_baseline.samples}")
print(f"Test samples: {test_gen_baseline.samples}")
print(f"Class indices: {train_gen_baseline.class_indices}")

In [None]:
def create_strong_augmentation_generators():
    """Create data generators with strong augmentation"""
    
    # Strong augmentation for training
    train_datagen_strong = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        zoom_range=0.15,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        channel_shift_range=20,
        fill_mode='nearest',
        validation_split=0.2
    )
    
    # Validation and test: only rescaling
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create generators
    train_generator = train_datagen_strong.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='training',
        seed=42,
        shuffle=True
    )
    
    # Use validation split from training data if val_dir doesn't exist
    if os.path.exists(VAL_DIR) and len(os.listdir(VAL_DIR)) > 0:
        validation_generator = val_test_datagen.flow_from_directory(
            VAL_DIR,
            target_size=IMG_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='binary',
            shuffle=False
        )
    else:
        validation_generator = train_datagen_strong.flow_from_directory(
            TRAIN_DIR,
            target_size=IMG_SIZE,
            batch_size=BATCH_SIZE,
            class_mode='binary',
            subset='validation',
            seed=42,
            shuffle=False
        )
    
    test_generator = val_test_datagen.flow_from_directory(
        TEST_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False
    )
    
    return train_generator, validation_generator, test_generator

# Create strong augmentation generators
train_gen_strong, val_gen_strong, test_gen_strong = create_strong_augmentation_generators()

print(f"Strong augmentation - Training samples: {train_gen_strong.samples}")
print(f"Strong augmentation - Validation samples: {val_gen_strong.samples}")
print(f"Strong augmentation - Test samples: {test_gen_strong.samples}")

In [None]:
def visualize_augmentations(generator, num_samples=8):
    """Visualize the effect of data augmentation"""
    
    # Get a batch of augmented images
    batch_x, batch_y = next(generator)
    
    # Create subplot
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.ravel()
    
    for i in range(min(num_samples, len(batch_x))):
        img = batch_x[i]
        label = "PNEUMONIA" if batch_y[i] > 0.5 else "NORMAL"
        
        axes[i].imshow(img)
        axes[i].set_title(f'{label}')
        axes[i].axis('off')
    
    plt.suptitle('Augmented Training Images', fontsize=16)
    plt.tight_layout()
    return fig

# Visualize baseline augmentation
print("Baseline Augmentation Examples:")
fig1 = visualize_augmentations(train_gen_baseline)
plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'baseline_augmentation.png'), dpi=300, bbox_inches='tight')
plt.show()

# Visualize strong augmentation
print("\nStrong Augmentation Examples:")
fig2 = visualize_augmentations(train_gen_strong)
plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'strong_augmentation.png'), dpi=300, bbox_inches='tight')
plt.show()

In [None]:
def create_custom_cnn(input_shape=(224, 224, 3), num_classes=1):
    """
    Create a custom CNN architecture for chest X-ray classification
    """
    model = models.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Third Convolutional Block
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Fourth Convolutional Block
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Fifth Convolutional Block
        layers.Conv2D(512, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Flatten and Dense Layers
        layers.GlobalAveragePooling2D(),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        
        # Output layer (sigmoid for binary classification)
        layers.Dense(num_classes, activation='sigmoid')
    ])
    
    return model

# Create model instance
model_architecture = create_custom_cnn(input_shape=(*IMG_SIZE, 3), num_classes=1)

# Display model summary
model_architecture.summary()

# Visualize model architecture
tf.keras.utils.plot_model(
    model_architecture, 
    to_file=os.path.join(RESULTS_DIR, 'plots', 'model_architecture.png'),
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB',
    dpi=200
)

In [None]:
def get_callbacks(model_name):
    """Create callbacks for training"""
    
    callbacks_list = [
        # Save best model
        callbacks.ModelCheckpoint(
            filepath=os.path.join(RESULTS_DIR, 'models', f'{model_name}_best.h5'),
            monitor='val_accuracy',
            save_best_only=True,
            save_weights_only=False,
            verbose=1
        ),
        
        # Reduce learning rate on plateau
        callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1
        ),
        
        # Early stopping
        callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        
        # CSV Logger
        callbacks.CSVLogger(
            os.path.join(RESULTS_DIR, f'{model_name}_training_log.csv')
        )
    ]
    
    return callbacks_list

# Compile model function
def compile_model(model, learning_rate=LEARNING_RATE):
    """Compile the model with appropriate optimizer and metrics"""
    
    model.compile(
        optimizer=optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy', 'precision', 'recall']
    )
    
    return model

# Calculate class weights to handle imbalance
def calculate_class_weights(generator):
    """Calculate class weights for imbalanced dataset"""
    from sklearn.utils.class_weight import compute_class_weight
    
    # Get all labels
    labels = generator.classes
    
    # Calculate class weights
    class_weights = compute_class_weight(
        'balanced',
        classes=np.unique(labels),
        y=labels
    )
    
    class_weight_dict = dict(enumerate(class_weights))
    print(f"Class weights: {class_weight_dict}")
    
    return class_weight_dict

# Calculate class weights for baseline training
class_weights = calculate_class_weights(train_gen_baseline)

In [None]:
# Create and compile baseline model
model_baseline = create_custom_cnn(input_shape=(*IMG_SIZE, 3), num_classes=1)
model_baseline = compile_model(model_baseline)

# Get callbacks for baseline model
baseline_callbacks = get_callbacks('baseline_model')

# Train baseline model
print("Training Baseline Model (Minimal Augmentation)...")
print("=" * 50)

history_baseline = model_baseline.fit(
    train_gen_baseline,
    epochs=EPOCHS,
    validation_data=val_gen_baseline,
    callbacks=baseline_callbacks,
    class_weight=class_weights,
    verbose=1
)

# Save training history
np.save(os.path.join(RESULTS_DIR, 'baseline_history.npy'), history_baseline.history)
print("Baseline model training completed!")

In [None]:
def plot_training_history(history, model_name, save_path=None):
    """Plot training history metrics"""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Plot accuracy
    axes[0, 0].plot(history['accuracy'], label='Training Accuracy')
    axes[0, 0].plot(history['val_accuracy'], label='Validation Accuracy')
    axes[0, 0].set_title(f'{model_name} - Accuracy')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Plot loss
    axes[0, 1].plot(history['loss'], label='Training Loss')
    axes[0, 1].plot(history['val_loss'], label='Validation Loss')
    axes[0, 1].set_title(f'{model_name} - Loss')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Plot precision
    axes[1, 0].plot(history['precision'], label='Training Precision')
    axes[1, 0].plot(history['val_precision'], label='Validation Precision')
    axes[1, 0].set_title(f'{model_name} - Precision')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Precision')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Plot recall
    axes[1, 1].plot(history['recall'], label='Training Recall')
    axes[1, 1].plot(history['val_recall'], label='Validation Recall')
    axes[1, 1].set_title(f'{model_name} - Recall')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Recall')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()

# Plot baseline model training history
print("Baseline Model Training History:")
plot_training_history(
    history_baseline.history, 
    'Baseline Model (Minimal Augmentation)',
    os.path.join(RESULTS_DIR, 'plots', 'baseline_training_history.png')
)

# Plot strong augmentation model training history
print("\nStrong Augmentation Model Training History:")
plot_training_history(
    history_strong.history, 
    'Strong Augmentation Model',
    os.path.join(RESULTS_DIR, 'plots', 'strong_augmentation_training_history.png')
)

In [None]:
def evaluate_model(model, test_generator, model_name):
    """Comprehensive model evaluation"""
    
    print(f"\n{'='*50}")
    print(f"EVALUATING {model_name.upper()}")
    print(f"{'='*50}")
    
    # Reset generator
    test_generator.reset()
    
    # Get predictions
    predictions = model.predict(test_generator, verbose=1)
    predicted_classes = (predictions > 0.5).astype(int).flatten()
    true_classes = test_generator.classes
    
    # Calculate metrics
    test_loss, test_accuracy, test_precision, test_recall = model.evaluate(
        test_generator, verbose=0
    )
    
    # Calculate F1 score
    f1 = 2 * (test_precision * test_recall) / (test_precision + test_recall)
    
    print(f"\nTest Metrics:")
    print(f"Accuracy: {test_accuracy:.4f}")
    print(f"Precision: {test_precision:.4f}")
    print(f"Recall: {test_recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Loss: {test_loss:.4f}")
    
    # Classification report
    print("\nClassification Report:")
    print(classification_report(
        true_classes, predicted_classes, 
        target_names=CLASS_NAMES
    ))
    
    # Confusion matrix
    cm = confusion_matrix(true_classes, predicted_classes)
    
    # Calculate additional metrics from confusion matrix
    tn, fp, fn, tp = cm.ravel()
    sensitivity = tp / (tp + fn)  # Recall/True Positive Rate
    specificity = tn / (tn + fp)  # True Negative Rate
    
    print(f"\nAdditional Metrics:")
    print(f"Sensitivity (True Positive Rate): {sensitivity:.4f}")
    print(f"Specificity (True Negative Rate): {specificity:.4f}")
    print(f"False Positive Rate: {fp/(fp+tn):.4f}")
    print(f"False Negative Rate: {fn/(fn+tp):.4f}")
    
    return {
        'predictions': predictions,
        'predicted_classes': predicted_classes,
        'true_classes': true_classes,
        'accuracy': test_accuracy,
        'precision': test_precision,
        'recall': test_recall,
        'f1': f1,
        'loss': test_loss,
        'confusion_matrix': cm,
        'sensitivity': sensitivity,
        'specificity': specificity
    }

def plot_confusion_matrix(cm, class_names, model_name, save_path=None):
    """Plot confusion matrix"""
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()

def plot_roc_curve(true_classes, predictions, model_name, save_path=None):
    """Plot ROC curve"""
    
    fpr, tpr, _ = roc_curve(true_classes, predictions)
    roc_auc = auc(fpr, tpr)
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, 
             label=f'ROC curve (AUC = {roc_auc:.4f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc="lower right")
    plt.grid(True)
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    return roc_auc

In [None]:
# Load best baseline model
model_baseline_best = tf.keras.models.load_model(
    os.path.join(RESULTS_DIR, 'models', 'baseline_model_best.h5')
)

# Evaluate baseline model
baseline_results = evaluate_model(model_baseline_best, test_gen_baseline, 'Baseline Model')

# Plot confusion matrix for baseline
plot_confusion_matrix(
    baseline_results['confusion_matrix'], 
    CLASS_NAMES,
    'Baseline Model',
    os.path.join(RESULTS_DIR, 'plots', 'baseline_confusion_matrix.png')
)

# Plot ROC curve for baseline
baseline_auc = plot_roc_curve(
    baseline_results['true_classes'],
    baseline_results['predictions'],
    'Baseline Model',
    os.path.join(RESULTS_DIR, 'plots', 'baseline_roc_curve.png')
)

In [None]:
# Load best strong augmentation model
model_strong_best = tf.keras.models.load_model(
    os.path.join(RESULTS_DIR, 'models', 'strong_augmentation_model_best.h5')
)

# Evaluate strong augmentation model
strong_results = evaluate_model(model_strong_best, test_gen_strong, 'Strong Augmentation Model')

# Plot confusion matrix for strong augmentation
plot_confusion_matrix(
    strong_results['confusion_matrix'], 
    CLASS_NAMES,
    'Strong Augmentation Model',
    os.path.join(RESULTS_DIR, 'plots', 'strong_augmentation_confusion_matrix.png')
)

# Plot ROC curve for strong augmentation
strong_auc = plot_roc_curve(
    strong_results['true_classes'],
    strong_results['predictions'],
    'Strong Augmentation Model',
    os.path.join(RESULTS_DIR, 'plots', 'strong_augmentation_roc_curve.png')
)

In [None]:
def create_comparison_table(baseline_results, strong_results, baseline_auc, strong_auc):
    """Create a comparison table of both models"""
    
    comparison_data = {
        'Metric': [
            'Accuracy', 'Precision', 'Recall', 'F1 Score', 
            'Sensitivity', 'Specificity', 'AUC', 'Loss'
        ],
        'Baseline Model': [
            f"{baseline_results['accuracy']:.4f}",
            f"{baseline_results['precision']:.4f}",
            f"{baseline_results['recall']:.4f}",
            f"{baseline_results['f1']:.4f}",
            f"{baseline_results['sensitivity']:.4f}",
            f"{baseline_results['specificity']:.4f}",
            f"{baseline_auc:.4f}",
            f"{baseline_results['loss']:.4f}"
        ],
        'Strong Augmentation Model': [
            f"{strong_results['accuracy']:.4f}",
            f"{strong_results['precision']:.4f}",
            f"{strong_results['recall']:.4f}",
            f"{strong_results['f1']:.4f}",
            f"{strong_results['sensitivity']:.4f}",
            f"{strong_results['specificity']:.4f}",
            f"{strong_auc:.4f}",
            f"{strong_results['loss']:.4f}"
        ]
    }
    
    df_comparison = pd.DataFrame(comparison_data)
    return df_comparison

# Create comparison table
comparison_df = create_comparison_table(baseline_results, strong_results, baseline_auc, strong_auc)
print("MODEL COMPARISON RESULTS")
print("=" * 60)
print(comparison_df.to_string(index=False))

# Save comparison to CSV
comparison_df.to_csv(os.path.join(RESULTS_DIR, 'model_comparison.csv'), index=False)

# Visualize comparison
metrics_to_plot = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'AUC']
baseline_values = [
    baseline_results['accuracy'], baseline_results['precision'], 
    baseline_results['recall'], baseline_results['f1'], baseline_auc
]
strong_values = [
    strong_results['accuracy'], strong_results['precision'], 
    strong_results['recall'], strong_results['f1'], strong_auc
]

x = np.arange(len(metrics_to_plot))
width = 0.35

fig, ax = plt.subplots(figsize=(12, 8))
bars1 = ax.bar(x - width/2, baseline_values, width, label='Baseline Model', alpha=0.8)
bars2 = ax.bar(x + width/2, strong_values, width, label='Strong Augmentation Model', alpha=0.8)

ax.set_xlabel('Metrics')
ax.set_ylabel('Score')
ax.set_title('Model Performance Comparison')
ax.set_xticks(x)
ax.set_xticklabels(metrics_to_plot)
ax.legend()
ax.grid(True, alpha=0.3)

# Add value labels on bars
def add_value_labels(bars):
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:.3f}',
                   xy=(bar.get_x() + bar.get_width() / 2, height),
                   xytext=(0, 3),  # 3 points vertical offset
                   textcoords="offset points",
                   ha='center', va='bottom', fontsize=9)

add_value_labels(bars1)
add_value_labels(bars2)

plt.tight_layout()
plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'model_comparison.png'), dpi=300, bbox_inches='tight')
plt.show()

In [None]:
def plot_learning_curves_comparison():
    """Plot learning curves for both models side by side"""
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # Accuracy comparison
    axes[0, 0].plot(history_baseline.history['val_accuracy'], label='Baseline', linewidth=2)
    axes[0, 0].plot(history_strong.history['val_accuracy'], label='Strong Aug', linewidth=2)
    axes[0, 0].set_title('Validation Accuracy Comparison')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Loss comparison
    axes[0, 1].plot(history_baseline.history['val_loss'], label='Baseline', linewidth=2)
    axes[0, 1].plot(history_strong.history['val_loss'], label='Strong Aug', linewidth=2)
    axes[0, 1].set_title('Validation Loss Comparison')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # Training vs Validation Accuracy - Baseline
    axes[1, 0].plot(history_baseline.history['accuracy'], label='Train', linewidth=2)
    axes[1, 0].plot(history_baseline.history['val_accuracy'], label='Val', linewidth=2)
    axes[1, 0].set_title('Baseline Model - Train vs Val Accuracy')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Accuracy')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # Training vs Validation Accuracy - Strong Augmentation
    axes[1, 1].plot(history_strong.history['accuracy'], label='Train', linewidth=2)
    axes[1, 1].plot(history_strong.history['val_accuracy'], label='Val', linewidth=2)
    axes[1, 1].set_title('Strong Aug Model - Train vs Val Accuracy')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Accuracy')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, 'plots', 'learning_curves_comparison.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()

plot_learning_curves_comparison()

In [None]:
def analyze_predictions(model, test_generator, model_name, num_examples=12):
    """Analyze model predictions with example images"""
    
    # Reset generator
    test_generator.reset()
    
    # Get predictions for the entire test set
    predictions = model.predict(test_generator, verbose=1)
    predicted_classes = (predictions > 0.5).astype(int).flatten()
    true_classes = test_generator.classes
    
    # Get filenames
    filenames = test_generator.filenames
    
    # Find correct and incorrect predictions
    correct_mask = predicted_classes == true_classes
    incorrect_mask = ~correct_mask
    
    # Get confidence scores
    confidence_scores = np.where(predicted_classes == 1, predictions.flatten(), 1 - predictions.flatten())
    
    print(f"\n{model_name} - Prediction Analysis:")
    print(f"Total predictions: {len(predictions)}")
    print(f"Correct predictions: {np.sum(correct_mask)}")
    print(f"Incorrect predictions: {np.sum(incorrect_mask)}")
    print(f"Accuracy: {np.sum(correct_mask) / len(predictions):.4f}")
    
    # Plot examples of correct and incorrect predictions
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    
    # Get some correct predictions
    correct_indices = np.where(correct_mask)[0]
    if len(correct_indices) >= 6:
        selected_correct = np.random.choice(correct_indices, 6, replace=False)
    else:
        selected_correct = correct_indices
    
    # Get some incorrect predictions
    incorrect_indices = np.where(incorrect_mask)[0]
    if len(incorrect_indices) >= 6:
        selected_incorrect = np.random.choice(incorrect_indices, 6, replace=False)
    else:
        selected_incorrect = incorrect_indices
    
    # Plot correct predictions
    for i, idx in enumerate(selected_correct):
        if i < 6:
            row = i // 2
            col = i % 2
            
            # Load and display image
            img_path = os.path.join(test_generator.directory, filenames[idx])
            img = load_img(img_path, target_size=IMG_SIZE)
            
            axes[row, col].imshow(img)
            
            true_label = CLASS_NAMES[true_classes[idx]]
            pred_label = CLASS_NAMES[predicted_classes[idx]]
            confidence = confidence_scores[idx]
            
            axes[row, col].set_title(f'✓ True: {true_label}\nPred: {pred_label}\nConf: {confidence:.3f}')
            axes[row, col].axis('off')
    
    # Plot incorrect predictions
    for i, idx in enumerate(selected_incorrect):
        if i < 6:
            row = i // 2
            col = i % 2 + 2
            
            # Load and display image
            img_path = os.path.join(test_generator.directory, filenames[idx])
            img = load_img(img_path, target_size=IMG_SIZE)
            
            axes[row, col].imshow(img)
            
            true_label = CLASS_NAMES[true_classes[idx]]
            pred_label = CLASS_NAMES[predicted_classes[idx]]
            confidence = confidence_scores[idx]
            
            axes[row, col].set_title(f'✗ True: {true_label}\nPred: {pred_label}\nConf: {confidence:.3f}')
            axes[row, col].axis('off')
    
    # Add column headers
    axes[0, 0].text(-0.1, 1.1, 'CORRECT PREDICTIONS', transform=axes[0, 0].transAxes, 
                   fontsize=14, fontweight='bold', ha='center')
    axes[0, 2].text(0.1, 1.1, 'INCORRECT PREDICTIONS', transform=axes[0, 2].transAxes, 
                   fontsize=14, fontweight='bold', ha='center')
    
    plt.suptitle(f'{model_name} - Prediction Examples', fontsize=16)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, 'plots', f'{model_name.lower().replace(" ", "_")}_predictions.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()
    
    return {
        'correct_indices': correct_indices,
        'incorrect_indices': incorrect_indices,
        'confidence_scores': confidence_scores,
        'filenames': filenames
    }

# Analyze baseline model predictions
baseline_analysis = analyze_predictions(model_baseline_best, test_gen_baseline, 'Baseline Model')

# Analyze strong augmentation model predictions  
strong_analysis = analyze_predictions(model_strong_best, test_gen_strong, 'Strong Augmentation Model')

In [None]:
def visualize_feature_maps(model, test_generator, layer_names=None, num_images=2):
    """Visualize feature maps from intermediate layers"""
    
    if layer_names is None:
        # Get some convolutional layer names
        layer_names = [layer.name for layer in model.layers 
                      if 'conv2d' in layer.name][:6]  # First 6 conv layers
    
    # Create a model that outputs feature maps
    feature_map_model = tf.keras.Model(
        inputs=model.input,
        outputs=[model.get_layer(name).output for name in layer_names]
    )
    
    # Get some test images
    test_generator.reset()
    batch_x, batch_y = next(test_generator)
    
    for img_idx in range(min(num_images, len(batch_x))):
        img = batch_x[img_idx:img_idx+1]  # Keep batch dimension
        feature_maps = feature_map_model.predict(img)
        
        # Create subplots
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        axes = axes.ravel()
        
        for i, (layer_name, feature_map) in enumerate(zip(layer_names, feature_maps)):
            if i < 6:  # Only plot first 6 layers
                # Take the first few feature maps from the layer
                feature_map_to_plot = feature_map[0, :, :, 0]  # First channel
                
                axes[i].imshow(feature_map_to_plot, cmap='viridis')
                axes[i].set_title(f'{layer_name}\nShape: {feature_map.shape[1:]}')
                axes[i].axis('off')
        
        plt.suptitle(f'Feature Maps - Image {img_idx + 1}')
        plt.tight_layout()
        plt.savefig(os.path.join(RESULTS_DIR, 'plots', f'feature_maps_img_{img_idx + 1}.png'), 
                    dpi=300, bbox_inches='tight')
        plt.show()

# Visualize feature maps for baseline model
print("Feature Maps - Baseline Model:")
visualize_feature_maps(model_baseline_best, test_gen_baseline, num_images=2)

In [None]:
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    """Generate Grad-CAM heatmap"""
    
    # Create a model that maps the input image to the activations
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )
    
    # Compute the gradient of the predicted class for our input image
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]
    
    # Gradient of the output with respect to the output feature map
    grads = tape.gradient(class_channel, last_conv_layer_output)
    
    # Mean intensity of the gradient over specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    
    # Multiply each channel by "how important this channel is"
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    
    # Normalize the heatmap
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def display_gradcam(model, test_generator, model_name, num_examples=6):
    """Display Grad-CAM visualizations"""
    
    # Find the last convolutional layer
    last_conv_layer = None
    for layer in reversed(model.layers):
        if 'conv2d' in layer.name:
            last_conv_layer = layer.name
            break
    
    if last_conv_layer is None:
        print("No convolutional layer found!")
        return
    
    print(f"Using last conv layer: {last_conv_layer}")
    
    # Get some test images
    test_generator.reset()
    batch_x, batch_y = next(test_generator)
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.ravel()
    
    for i in range(min(num_examples, len(batch_x))):
        img = batch_x[i:i+1]  # Keep batch dimension
        
        # Get prediction
        prediction = model.predict(img, verbose=0)
        predicted_class = int(prediction[0] > 0.5)
        confidence = prediction[0][0] if predicted_class == 1 else 1 - prediction[0][0]
        
        # Generate heatmap
        heatmap = make_gradcam_heatmap(img, model, last_conv_layer)
        
        # Display original image
        original_img = batch_x[i]
        
        # Resize heatmap to match original image
        heatmap_resized = cv2.resize(heatmap, (IMG_SIZE[1], IMG_SIZE[0]))
        
        # Create superimposed image
        heatmap_colored = plt.cm.jet(heatmap_resized)[:, :, :3]
        superimposed = heatmap_colored * 0.4 + original_img * 0.6
        
        axes[i].imshow(superimposed)
        axes[i].set_title(f'Pred: {CLASS_NAMES[predicted_class]}\nConf: {confidence:.3f}')
        axes[i].axis('off')
    
    plt.suptitle(f'Grad-CAM Visualizations - {model_name}')
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, 'plots', f'{model_name.lower().replace(" ", "_")}_gradcam.png'), 
                dpi=300, bbox_inches='tight')
    plt.show()

# Generate Grad-CAM for both models
print("Grad-CAM Visualizations:")
display_gradcam(model_baseline_best, test_gen_baseline, 'Baseline Model')
display_gradcam(model_strong_best, test_gen_strong, 'Strong Augmentation Model')

In [None]:
def analyze_model_complexity():
    """Analyze and compare model complexity"""
    
    # Get model parameters
    baseline_params = model_baseline_best.count_params()
    strong_params = model_strong_best.count_params()
    
    # Calculate model size (approximate)
    baseline_size_mb = baseline_params * 4 / (1024 * 1024)  # Assuming float32
    strong_size_mb = strong_params * 4 / (1024 * 1024)
    
    print("MODEL COMPLEXITY ANALYSIS")
    print("=" * 40)
    print(f"Baseline Model:")
    print(f"  - Total Parameters: {baseline_params:,}")
    print(f"  - Model Size: {baseline_size_mb:.2f} MB")
    print(f"\nStrong Augmentation Model:")
    print(f"  - Total Parameters: {strong_params:,}")
    print(f"  - Model Size: {strong_size_mb:.2f} MB")
    
    return {
        'baseline_params': baseline_params,
        'strong_params': strong_params,
        'baseline_size_mb': baseline_size_mb,
        'strong_size_mb': strong_size_mb
    }

def calculate_inference_time(model, test_generator, model_name, num_batches=10):
    """Calculate average inference time"""
    
    import time
    
    test_generator.reset()
    times = []
    
    for i in range(num_batches):
        batch_x, _ = next(test_generator)
        
        start_time = time.time()
        _ = model.predict(batch_x, verbose=0)
        end_time = time.time()
        
        times.append(end_time - start_time)
        
        if i >= test_generator.samples // test_generator.batch_size:
            break
    
    avg_time = np.mean(times)
    avg_time_per_image = avg_time / test_generator.batch_size
    
    print(f"\n{model_name} - Inference Time:")
    print(f"  - Average batch time: {avg_time:.4f} seconds")
    print(f"  - Average per image: {avg_time_per_image:.4f} seconds")
    print(f"  - Images per second: {1/avg_time_per_image:.2f}")
    
    return avg_time_per_image

# Analyze model complexity
complexity_stats = analyze_model_complexity()

# Calculate inference times
baseline_inference_time = calculate_inference_time(model_baseline_best, test_gen_baseline, 'Baseline Model')
strong_inference_time = calculate_inference_time(model_strong_best, test_gen_strong, 'Strong Augmentation Model')

In [None]:
def generate_final_report():
    """Generate a comprehensive final report"""
    
    report = f"""
# CHEST X-RAY PNEUMONIA CLASSIFICATION - FINAL REPORT
{'='*60}

## EXPERIMENT OVERVIEW
This study compared a custom CNN architecture with two different data augmentation strategies:
1. **Baseline Model**: Minimal augmentation (rotation, shifts, horizontal flip)
2. **Strong Augmentation Model**: Extensive augmentation (rotation, shifts, shear, zoom, brightness, etc.)

## DATASET INFORMATION
- **Source**: Kaggle Chest X-Ray Images (Pneumonia) by Paul Timothy Mooney
- **Total Images**: 5,863 X-ray images (JPEG format)
- **Classes**: NORMAL vs PNEUMONIA (binary classification)
- **Split**: Train/Validation/Test folders with class subfolders
- **Image Size**: {IMG_SIZE[0]}x{IMG_SIZE[1]} pixels
- **Class Distribution**: Imbalanced dataset (more pneumonia cases)

## MODEL ARCHITECTURE
- **Type**: Custom Convolutional Neural Network
- **Layers**: 5 Convolutional blocks + Dense layers
- **Parameters**: {complexity_stats['baseline_params']:,}
- **Features**: Batch normalization, dropout, global average pooling
- **Activation**: ReLU (hidden), Sigmoid (output)
- **Optimizer**: Adam with learning rate {LEARNING_RATE}

## TRAINING CONFIGURATION
- **Epochs**: {EPOCHS}
- **Batch Size**: {BATCH_SIZE}
- **Loss Function**: Binary crossentropy
- **Class Weights**: Applied to handle class imbalance
- **Callbacks**: ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
- **Metrics**: Accuracy, Precision, Recall

## RESULTS SUMMARY

### Baseline Model (Minimal Augmentation)
- **Test Accuracy**: {baseline_results['accuracy']:.4f}
- **Test Precision**: {baseline_results['precision']:.4f}
- **Test Recall**: {baseline_results['recall']:.4f}
- **Test F1 Score**: {baseline_results['f1']:.4f}
- **AUC**: {baseline_auc:.4f}
- **Sensitivity**: {baseline_results['sensitivity']:.4f}
- **Specificity**: {baseline_results['specificity']:.4f}

### Strong Augmentation Model
- **Test Accuracy**: {strong_results['accuracy']:.4f}
- **Test Precision**: {strong_results['precision']:.4f}
- **Test Recall**: {strong_results['recall']:.4f}
- **Test F1 Score**: {strong_results['f1']:.4f}
- **AUC**: {strong_auc:.4f}
- **Sensitivity**: {strong_results['sensitivity']:.4f}
- **Specificity**: {strong_results['specificity']:.4f}

## PERFORMANCE COMPARISON
"""
    
    # Determine which model performed better
    if strong_results['accuracy'] > baseline_results['accuracy']:
        better_model = "Strong Augmentation Model"
        accuracy_improvement = strong_results['accuracy'] - baseline_results['accuracy']
        report += f"✅ **Winner**: {better_model}\n"
        report += f"📈 **Accuracy Improvement**: +{accuracy_improvement:.4f} ({accuracy_improvement*100:.2f}%)\n"
    else:
        better_model = "Baseline Model"
        accuracy_difference = baseline_results['accuracy'] - strong_results['accuracy']
        report += f"✅ **Winner**: {better_model}\n"
        report += f"📉 **Accuracy Difference**: +{accuracy_difference:.4f} ({accuracy_difference*100:.2f}%)\n"
    
    report += f"""
## KEY FINDINGS

### Data Augmentation Impact
"""
    
    if strong_results['accuracy'] > baseline_results['accuracy']:
        report += "- Strong data augmentation **improved** model performance\n"
        report += "- Enhanced generalization and reduced overfitting\n"
        report += "- Better handling of dataset variability\n"
    else:
        report += "- Strong data augmentation **did not improve** model performance\n"
        report += "- Possible over-augmentation or dataset-specific characteristics\n"
        report += "- Baseline augmentation was sufficient for this dataset\n"
    
    report += f"""
### Medical Relevance
- **Sensitivity (True Positive Rate)**: Critical for medical diagnosis
  - Baseline: {baseline_results['sensitivity']:.4f}
  - Strong Aug: {strong_results['sensitivity']:.4f}
- **Specificity (True Negative Rate)**: Important to avoid false alarms
  - Baseline: {baseline_results['specificity']:.4f}
  - Strong Aug: {strong_results['specificity']:.4f}

### Model Efficiency
- **Inference Time**: {baseline_inference_time:.4f} seconds per image
- **Model Size**: {complexity_stats['baseline_size_mb']:.2f} MB
- **Suitable for**: Real-time clinical applications

## RECOMMENDATIONS

### For Clinical Deployment
1. **Model Selection**: Use {better_model.lower()} for production
2. **Threshold Tuning**: Consider adjusting prediction threshold based on clinical needs
3. **Validation**: Perform extensive validation on diverse datasets
4. **Integration**: Ensure proper integration with hospital PACS systems

### For Future Research
1. **Architecture**: Experiment with transfer learning (ResNet, DenseNet)
2. **Data**: Collect more diverse and balanced datasets
3. **Preprocessing**: Investigate lung segmentation and enhancement techniques
4. **Ensemble**: Combine multiple models for improved performance

## LIMITATIONS
- Limited to single dataset source
- Binary classification only (Normal vs Pneumonia)
- No distinction between bacterial and viral pneumonia
- Pediatric focus (1-5 years old patients)

## CONCLUSION
This study demonstrates the {'positive' if strong_results['accuracy'] > baseline_results['accuracy'] else 'limited'} impact of strong data augmentation on chest X-ray pneumonia classification. The custom CNN architecture achieved {'good' if max(baseline_results['accuracy'], strong_results['accuracy']) > 0.85 else 'moderate'} performance with potential for clinical applications after further validation and optimization.

## FILES GENERATED
- Model weights: `results/models/`
- Training logs: `results/*.csv`
- Visualizations: `results/plots/`
- Comparison table: `results/model_comparison.csv`
- This report: `results/final_report.txt`

{'='*60}
Report generated on: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
    
    # Save report to file
    with open(os.path.join(RESULTS_DIR, 'final_report.txt'), 'w') as f:
        f.write(report)
    
    print(report)

# Generate final report
generate_final_report()

In [None]:
def prepare_for_deployment(best_model, model_name):
    """Prepare model for deployment"""
    
    # Save in different formats
    deployment_dir = os.path.join(RESULTS_DIR, 'deployment')
    os.makedirs(deployment_dir, exist_ok=True)
    
    # Save as HDF5
    model_path_h5 = os.path.join(deployment_dir, f'{model_name}_final.h5')
    best_model.save(model_path_h5)
    print(f"Model saved as HDF5: {model_path_h5}")
    
    # Save as SavedModel format (for TensorFlow Serving)
    model_path_saved = os.path.join(deployment_dir, f'{model_name}_savedmodel')
    best_model.save(model_path_saved, save_format='tf')
    print(f"Model saved as SavedModel: {model_path_saved}")
    
    # Convert to TensorFlow Lite for mobile deployment
    converter = tf.lite.TFLiteConverter.from_keras_model(best_model)
    tflite_model = converter.convert()
    
    tflite_path = os.path.join(deployment_dir, f'{model_name}_model.tflite')
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)
    print(f"Model saved as TensorFlow Lite: {tflite_path}")
    
    # Create model info file
    model_info = {
        'model_name': model_name,
        'input_shape': list(IMG_SIZE) + [3],
        'output_shape': [1],
        'classes': CLASS_NAMES,
        'preprocessing': {
            'rescale': '1./255',
            'resize': IMG_SIZE
        },
        'postprocessing': {
            'threshold': 0.5,
            'class_0': 'NORMAL',
            'class_1': 'PNEUMONIA'
        },
        'performance': {
            'accuracy': float(strong_results['accuracy'] if model_name == 'strong_augmentation' else baseline_results['accuracy']),
            'precision': float(strong_results['precision'] if model_name == 'strong_augmentation' else baseline_results['precision']),
            'recall': float(strong_results['recall'] if model_name == 'strong_augmentation' else baseline_results['recall']),
            'f1_score': float(strong_results['f1'] if model_name == 'strong_augmentation' else baseline_results['f1'])
        }
    }
    
    import json
    with open(os.path.join(deployment_dir, f'{model_name}_info.json'), 'w') as f:
        json.dump(model_info, f, indent=2)
    
    print(f"Model info saved: {model_name}_info.json")

def create_inference_script():
    """Create a standalone inference script"""
    
    inference_code = '''
import tensorflow as tf
import numpy as np
from PIL import Image
import json
import os

class ChestXrayClassifier:
    def __init__(self, model_path, model_info_path):
        """Initialize the classifier with model and info"""
        self.model = tf.keras.models.load_model(model_path)
        
        with open(model_info_path, 'r') as f:
            self.model_info = json.load(f)
        
        self.input_shape = tuple(self.model_info['input_shape'][:2])
        self.classes = self.model_info['classes']
        self.threshold = self.model_info['postprocessing']['threshold']
    
    def preprocess_image(self, image_path):
        """Preprocess image for prediction"""
        # Load and resize image
        img = Image.open(image_path).convert('RGB')
        img = img.resize(self.input_shape)
        
        # Convert to array and normalize
        img_array = np.array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)
        
        return img_array
    
    def predict(self, image_path):
        """Make prediction on a single image"""
        # Preprocess
        img_array = self.preprocess_image(image_path)
        
        # Predict
        prediction = self.model.predict(img_array, verbose=0)[0][0]
        
        # Convert to class
        predicted_class = int(prediction > self.threshold)
        class_name = self.classes[predicted_class]
        confidence = prediction if predicted_class == 1 else 1 - prediction
        
        return {
            'class': class_name,
            'confidence': float(confidence),
            'raw_prediction': float(prediction),
            'pneumonia_probability': float(prediction)
        }
    
    def predict_batch(self, image_paths):
        """Make predictions on multiple images"""
        results = []
        for img_path in image_paths:
            try:
                result = self.predict(img_path)
                result['image_path'] = img_path
                results.append(result)
            except Exception as e:
                results.append({
                    'image_path': img_path,
                    'error': str(e)
                })
        return results

# Example usage
if __name__ == "__main__":
    # Initialize classifier
    classifier = ChestXrayClassifier(
        model_path="path/to/model.h5",
        model_info_path="path/to/model_info.json"
    )
    
    # Single prediction
    result = classifier.predict("path/to/chest_xray.jpg")
    print(f"Prediction: {result['class']} (confidence: {result['confidence']:.3f})")
    
    # Batch prediction
    image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"]
    results = classifier.predict_batch(image_paths)
    
    for result in results:
        if 'error' not in result:
            print(f"{result['image_path']}: {result['class']} ({result['confidence']:.3f})")
        else:
            print(f"{result['image_path']}: Error - {result['error']}")
'''
    
    with open(os.path.join(RESULTS_DIR, 'deployment', 'inference.py'), 'w') as f:
        f.write(inference_code)
    
    print("Inference script created: deployment/inference.py")

# Prepare both models for deployment
print("Preparing models for deployment...")

# Determine which model to use as primary
if strong_results['accuracy'] > baseline_results['accuracy']:
    primary_model = model_strong_best
    primary_name = "strong_augmentation"
    print("Using Strong Augmentation Model as primary deployment model")
else:
    primary_model = model_baseline_best
    primary_name = "baseline"
    print("Using Baseline Model as primary deployment model")

# Prepare primary model for deployment
prepare_for_deployment(primary_model, primary_name)

# Create inference script
create_inference_script()

print("\nDeployment preparation completed!")
print("Files available in 'results/deployment/' directory:")
print("- Model weights (.h5, SavedModel, .tflite)")
print("- Model info (.json)")
print("- Inference script (.py)")

In [None]:
def cleanup_and_summary():
    """Final cleanup and project summary"""
    
    print("🎉 CHEST X-RAY PNEUMONIA CLASSIFICATION PROJECT COMPLETED!")
    print("=" * 60)
    
    # List all generated files
    print("\n📁 GENERATED FILES AND DIRECTORIES:")
    for root, dirs, files in os.walk(RESULTS_DIR):
        level = root.replace(RESULTS_DIR, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        
        sub_indent = ' ' * 2 * (level + 1)
        for file in files:
            size = os.path.getsize(os.path.join(root, file))
            size_str = f"({size/1024/1024:.1f}MB)" if size > 1024*1024 else f"({size/1024:.1f}KB)"
            print(f"{sub_indent}{file} {size_str}")
    
    print(f"\n🔍 QUICK ACCESS PATHS:")
    print(f"├── Best Models: {RESULTS_DIR}/models/")
    print(f"├── Training Plots: {RESULTS_DIR}/plots/")
    print(f"├── Training Logs: {RESULTS_DIR}/*.csv")
    print(f"├── Deployment Ready: {RESULTS_DIR}/deployment/")
    print(f"└── Final Report: {RESULTS_DIR}/final_report.txt")
    
    print(f"\n🎯 KEY TAKEAWAYS:")
    winner = "Strong Augmentation" if strong_results['accuracy'] > baseline_results['accuracy'] else "Baseline"
    winner_acc = max(strong_results['accuracy'], baseline_results['accuracy'])
    print(f"✅ Best Model: {winner} Model ({winner_acc:.1%} accuracy)")
    print(f"🔬 Architecture: Custom CNN with {complexity_stats['baseline_params']:,} parameters")
    print(f"⚡ Inference Speed: ~{baseline_inference_time*1000:.0f}ms per image")
    print(f"💾 Model Size: ~{complexity_stats['baseline_size_mb']:.1f}MB")
    
    print(f"\n🚀 NEXT STEPS:")
    print("1. Review the final report for detailed analysis")
    print("2. Test the deployment-ready model with new images")
    print("3. Consider transfer learning with pre-trained models")
    print("4. Expand to multi-class classification (bacterial vs viral)")
    print("5. Implement grad-CAM for better interpretability")
    
    print(f"\n📊 EXPERIMENT METRICS:")
    print(f"{'Metric':<20} {'Baseline':<12} {'Strong Aug':<12} {'Winner'}")
    print("-" * 50)
    metrics = [
        ('Accuracy', baseline_results['accuracy'], strong_results['accuracy']),
        ('Precision', baseline_results['precision'], strong_results['precision']),
        ('Recall', baseline_results['recall'], strong_results['recall']),
        ('F1 Score', baseline_results['f1'], strong_results['f1']),
        ('AUC', baseline_auc, strong_auc)
    ]
    
    for metric, base_val, strong_val in metrics:
        if base_val > strong_val:
            winner_symbol = "🏆 Baseline"
        elif strong_val > base_val:
            winner_symbol = "🏆 Strong"
        else:
            winner_symbol = "🤝 Tie"
        print(f"{metric:<20} {base_val:<12.4f} {strong_val:<12.4f} {winner_symbol}")
    
    print(f"\n💡 TIPS FOR IMPROVEMENT:")
    print("• Try transfer learning with ImageNet pre-trained models")
    print("• Experiment with different optimizers (SGD, AdamW)")
    print("• Implement cross-validation for more robust evaluation")
    print("• Add external validation with different hospital datasets")
    print("• Consider ensemble methods combining multiple models")
    
    print("\n✅ Project successfully completed! All results saved.")

# Run cleanup and show summary
cleanup_and_summary()

# Clear memory
import gc
gc.collect()

print("\n" + "="*60)
print("CHEST X-RAY PNEUMONIA CLASSIFICATION - END OF NOTEBOOK")
print("="*60)