### Analyze chest X-ray scans to detect pneumonia using transfer learning
Base model: VGG16\
Dataset: NIH X-ray dataset

In [None]:
# Step 1: Import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from datasets import load_dataset
import os
from PIL import Image
import io
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import GlobalAveragePooling2D

In [None]:
# Step 2: Download and prepare the dataset from Hugging Face
dataset = load_dataset('keremberke/chest-xray-classification', 'full')

# Prepare directories
os.makedirs('xray_dataset/train/normal', exist_ok=True)
os.makedirs('xray_dataset/train/pneumonia', exist_ok=True)
os.makedirs('xray_dataset/validation/normal', exist_ok=True)
os.makedirs('xray_dataset/validation/pneumonia', exist_ok=True)


# Assuming the dataset splits are already defined in Hugging Face dataset
for split in ['train', 'validation']:
    images = dataset[split]['image']
    labels = dataset[split]['labels']

    for i, (img, label) in enumerate(zip(images, labels)):
        if label == 0:
            label_folder = 'normal'
        else:
            label_folder = 'pneumonia'
        img_path = f'xray_dataset/{split}/{label_folder}/{split}_{label}_{i}.jpeg'
        buffer = io.BytesIO()
        img.save(buffer, format='JPEG')
        with open(img_path, 'wb') as f:
            f.write(buffer.getvalue())


In [None]:

# Step 3: Preprocess the dataset
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_dir = 'xray_dataset/train'
validation_dir = 'xray_dataset/validation'

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=20,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=20,
    class_mode='binary',
    shuffle=False  # Important for correct label ordering
)


In [None]:
# Load the pre-trained VGG-19 model, excluding the top layer
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of the base model
base_model.trainable = False

In [None]:

# Step 4: Build the CNN model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dense(1, activation='sigmoid')  # Assuming binary classification for pneumonia
])

In [None]:


# Step 5: Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model.summary()

In [None]:

# Step 6: Train the model
history = model.fit(
    train_generator,
    epochs=2,
    validation_data=validation_generator
    )


In [None]:
model.evaluate(validation_generator)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

predictions = model.predict(validation_generator)
predicted_classes = np.where(predictions > 0.5, 1, 0)
true_classes = validation_generator.classes

# Evaluating the model
conf_matrix = confusion_matrix(true_classes, predicted_classes)
report = classification_report(true_classes, predicted_classes, target_names=['Pneumonia', 'Normal'])

# Extracting precision and recall from the classification report
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)