In [None]:
#import necessary modules
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
os.listdir('/kaggle/input/')

> Image data generators for training, validation, and testing a deep learning model on chest X-ray images, resizing them to 224x224 pixels. The training data is augmented using random transformations like rotation, shifting, zooming, and flipping to enhance model generalization and prevent overfitting. A portion of the training data (20%) is reserved for validation using the validation_split argument. The test data is loaded separately without augmentation, with only pixel values rescaled for normalization. This setup facilitates training, validation, and testing of a binary classification model.

In [None]:
#Image dimensions
IMG_HEIGHT , IMG_WIDTH = 224, 224

#Create ImageDataGenerators for training, validation and test sets.

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.25,
    height_shift_range=0.25,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    validation_split = 0.2 # Use 20% of the training data for validation
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/kaggle/input/chest-xray-pneumonia/chest_xray/train',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=32,
    class_mode='binary',  # Use 'categorical' if you have more than two classes
    subset = 'training'
)

val_generator = train_datagen.flow_from_directory(
    '/kaggle/input/chest-xray-pneumonia/chest_xray/train',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=32,
    class_mode='binary',
    subset = 'validation',
    shuffle = True
)

test_generator = test_datagen.flow_from_directory(
    '/kaggle/input/chest-xray-pneumonia/chest_xray/test',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

> Here we build a binary classification model using the pre-trained **MobileNet** as the base, excluding its top classification layer. The last five layers of MobileNet are set to be trainable, while the rest are frozen to retain learned features. The model is sequentially constructed by adding dense layers with *ReLU* activation and dropout layers for regularization. The output layer uses a sigmoid activation for binary classification. After compilation with the **Adamax** optimizer and binary cross-entropy loss, the model is run with dummy input data to ensure proper shape inference.

In [None]:
# Load the Xception base model without the top (classification) layer
base_model = MobileNet(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_HEIGHT , IMG_WIDTH, 3))

# Freeze the layers in the base model
for layer in base_model.layers[-10:]:
    layer.trainable = True

# Build Model 
model = Sequential()

# Base Model 
model.add(base_model)

# Dense Layer 1
model.add(Dense(256, activation='relu'))

model.add(Dropout(0.45))
# Dense Layer 2 
model.add(Dense(128,activation='relu'))

model.add(Dropout(0.3))

# Dense Layer 4 
model.add(Dense(64,activation='relu'))

# Output Layer
model.add(Dense(1, activation='sigmoid'))

# Compile
model.compile(optimizer=Adamax(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Run the model once with dummy data to ensure shape inference
dummy_input = np.random.random((1, IMG_HEIGHT, IMG_WIDTH, 3))
model(dummy_input)

# Check model summary again
model.summary()

> We train the model for up to 20 epochs with early stopping and model checkpointing, monitoring validation loss to stop training if it doesn't improve for 5 epochs and saving only the best model.

In [None]:
# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[early_stopping, model_checkpoint],
)

In [None]:
# Load the best model
model.load_weights('best_model.keras')

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test accuracy: {test_acc:.2f}")


> Predictions on test data, rounds them for binary classification, prints the classification report and confusion matrix, and calculates the ROC curve and AUC score for performance evaluation.

In [None]:
# Predict on test data
predictions = model.predict(test_generator)
predictions = np.round(predictions).astype(int)

# Classification report
print(classification_report(test_generator.classes, predictions))

# Confusion matrix
print(confusion_matrix(test_generator.classes, predictions))

# ROC Curve
fpr, tpr, thresholds = roc_curve(test_generator.classes, predictions)
roc_auc = auc(fpr, tpr)

In [None]:
model.save('medical_image_classifier.h5')

In [None]:
# Get the true labels from the test generator
true_labels = test_generator.classes

# Get the predicted probabilities from the model
predictions = model.predict(test_generator)

# Convert predicted probabilities to class labels (0 or 1 for binary classification)
predicted_labels = np.round(predictions).astype(int).reshape(-1)

In [None]:
cm = confusion_matrix(true_labels, predicted_labels)

# Plotting the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_generator.class_indices.keys())

# Plot the matrix
disp.plot(cmap=plt.cm.Blues)
plt.show()

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()