## Medical image classification (Kaggle Chest X-Ray Pneumonia)

In [None]:
chest_xray/
  train/
    NORMAL/
    PNEUMONIA/
  test/
    NORMAL/
    PNEUMONIA/
  val/
    NORMAL/
    PNEUMONIA/

In [None]:
# Import libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, confusion_matrix

# Define dataset path
dataset_dir = "path_to_chest_xray_dataset"
train_dir = os.path.join(dataset_dir, "train")
val_dir = os.path.join(dataset_dir, "val")
test_dir = os.path.join(dataset_dir, "test")

# Step 1: Data Visualization and Exploration
def display_images_simple(directory):
    categories = ["NORMAL", "PNEUMONIA"]
    plt.figure(figsize=(10, 5))
    for i, category in enumerate(categories):
        folder_path = os.path.join(directory, category)
        img_path = os.path.join(folder_path, os.listdir(folder_path)[0])  # Take the first image
        img = plt.imread(img_path)
        plt.subplot(1, 2, i + 1)
        plt.imshow(img, cmap='gray')
        plt.title(category)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Display images from training data
print("Sample images from training data:")
display_images_simple(train_dir)

# Count images in each class
def count_images_simple(directory):
    categories = ["NORMAL", "PNEUMONIA"]
    counts = {category: len(os.listdir(os.path.join(directory, category))) for category in categories}
    return counts

train_counts = count_images_simple(train_dir)
print("Training Data Distribution:", train_counts)


# Step 2: Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(rescale=1./255)  # Scale pixel values to [0, 1]
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load images
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(128, 128), 
    batch_size=32, 
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(dataset_dir, "val"),
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    os.path.join(dataset_dir, "test"),
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Ensure test data order is preserved for evaluation
    
    
# Step 3: Build the CNN Model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the Model
history = model.fit(train_generator, validation_data=val_generator, epochs=10)

# Step 5: Evaluate the Model
# Plot Accuracy and Loss
def plot_history(history):
    plt.figure(figsize=(12, 4))
    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.legend()
    plt.title('Accuracy')
    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.legend()
    plt.title('Loss')
    plt.show()

plot_history(history)

# Test the Model
test_generator.reset()
predictions = model.predict(test_generator)
y_pred = (predictions > 0.5).astype(int).flatten()
y_true = test_generator.classes

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Normal", "Pneumonia"], yticklabels=["Normal", "Pneumonia"])
plt.title("Confusion Matrix")
plt.show()

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Normal", "Pneumonia"]))

# Example Prediction Visualization
def visualize_predictions(generator, predictions, num_samples=9):
    plt.figure(figsize=(10, 10))
    for i in range(num_samples):
        img, label = generator[i]
        plt.subplot(3, 3, i + 1)
        plt.imshow(img[0], cmap='gray')
        pred_label = "Pneumonia" if predictions[i] == 1 else "Normal"
        true_label = "Pneumonia" if label[0] == 1 else "Normal"
        plt.title(f"Pred: {pred_label}\nTrue: {true_label}")
        plt.axis('off')
    plt.show()

visualize_predictions(test_generator, y_pred)
