In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from keras.callbacks import ReduceLROnPlateau
import cv2
import os

In [None]:
labels = ['PNEUMONIA', 'NORMAL']
img_size = 150
def get_training_data(data_dir):
    images = []
    labels_list = []

    for label in labels:
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                images.append(resized_arr)
                labels_list.append(class_num)
            except Exception as e:
                print(e)

    # Convert to numpy arrays with proper shapes
    X = np.array(images).reshape(-1, img_size, img_size, 1)  # Add channel dimension
    y = np.array(labels_list)

    return X, y

In [None]:
X_train, y_train = get_training_data('../input/chest-xray-pneumonia/chest_xray/chest_xray/train')
X_test, y_test = get_training_data('../input/chest-xray-pneumonia/chest_xray/chest_xray/test')
X_val, y_val = get_training_data('../input/chest-xray-pneumonia/chest_xray/chest_xray/val')

In [None]:
# For visualization only, create string labels
label_names = []
for label in y_train:
    if label == 0:
        label_names.append("Pneumonia")
    else:
        label_names.append("Normal")

# Convert to pandas Series for better compatibility with seaborn
import pandas as pd
label_series = pd.Series(label_names, name="Diagnosis")

# Plot using the string labels
sns.set_style('darkgrid')
sns.countplot(x=label_series)  # Use x= parameter to specify the data
plt.title("Distribution of Training Data")
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()

In [None]:
# Display the first image in the training set
plt.figure(figsize = (5,5))
plt.imshow(X_train[0].reshape(img_size, img_size), cmap='gray')  # Reshape to remove the channel dimension
plt.title(labels[y_train[0]])

# Display the last image in the training set
plt.figure(figsize = (5,5))
plt.imshow(X_train[-1].reshape(img_size, img_size), cmap='gray')
plt.title(labels[y_train[-1]])

In [None]:
# Normalize the data
x_train = np.array(X_train) / 255
x_val = np.array(X_val) / 255
x_test = np.array(X_test) / 255

In [None]:
# resize data for deep learning
x_train = X_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_val = X_val.reshape(-1, img_size, img_size, 1)
y_val = np.array(y_val)

x_test = X_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

In [None]:
# With data augmentation to prevent overfitting and handling the imbalance in dataset

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range = 30,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.2, # Randomly zoom image
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip = True,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(x_train)

In [None]:
model = Sequential()
model.add(Conv2D(32 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (150,150,1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(128 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(256 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Flatten())
model.add(Dense(units = 128 , activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(units = 1 , activation = 'sigmoid'))
model.compile(optimizer = "rmsprop" , loss = 'binary_crossentropy' , metrics = ['accuracy'])
model.summary()

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)

In [None]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

callbacks = [
    ReduceLROnPlateau(patience=5, factor=0.1, verbose=True),
    ModelCheckpoint('best_model.keras', save_best_only=True),
    EarlyStopping(patience=12)
]

history = model.fit(datagen.flow(x_train,y_train, batch_size = 32) ,epochs = 12 , validation_data = datagen.flow(x_val, y_val) ,callbacks = [learning_rate_reduction])

In [None]:
print("Loss of the model is - " , model.evaluate(x_test,y_test)[0])
print("Accuracy of the model is - " , model.evaluate(x_test,y_test)[1]*100 , "%")

In [None]:
epochs = [i for i in range(12)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(20,10)

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Training & Validation Loss")
plt.show()

In [None]:
# Get the raw probabilities
raw_predictions = model.predict(X_test)

# Convert to binary class predictions (0 or 1)
predictions = (raw_predictions > 0.5).astype(int)

# Reshape if needed
predictions = predictions.reshape(1,-1)[0]

# Display first 15 predictions
print(predictions[:15])

In [None]:
print(classification_report(y_test, predictions, target_names = ['Pneumonia (Class 0)','Normal (Class 1)']))

In [None]:
cm = confusion_matrix(y_test,predictions)
cm

In [None]:
cm = pd.DataFrame(cm , index = ['0','1'] , columns = ['0','1'])

In [None]:
plt.figure(figsize = (10,10))
sns.heatmap(cm,cmap= "Blues", linecolor = 'black' , linewidth = 1 , annot = True, fmt='',xticklabels = labels,yticklabels = labels)

In [None]:
correct = np.nonzero(predictions == y_test)[0]
incorrect = np.nonzero(predictions != y_test)[0]

In [None]:
i = 0
for c in correct[:6]:
    plt.subplot(3,2,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[c].reshape(150,150), cmap="gray", interpolation='none')
    plt.title(f"Predicted Class {predictions[c]},\nActual Class {y_test[c]}")
    plt.tight_layout()
    i += 1

In [None]:
model.save('/kaggle/working/pneumonia_detection_model.h5')

In [None]:
from tensorflow.keras.models import load_model
import numpy as np
import cv2

model = load_model('./pneumonia_detection_model.h5')


def preprocess_image(image_path):
    # Read the image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (150, 150))
    img = img / 255.0

    # Reshape for the model (add batch and channel dimensions)
    img = np.reshape(img, (-1, 150, 150, 1))

    return img

def predict_pneumonia(image_path):
    processed_img = preprocess_image(image_path)

    prediction = model.predict(processed_img)
    print("PRED:",prediction)

    # Get result (threshold at 0.5)
    result = "PNEUMONIA" if prediction[0][0] > 0.5 else "NORMAL"
    confidence = prediction[0][0] if prediction[0][0] > 0.5 else 1 - prediction[0][0]

    return result, confidence

img_path = "IM-0003-0001.jpeg"
result, confidence = predict_pneumonia(img_path)
print(f"Diagnosis: {result} (Confidence: {confidence:.2f})")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317ms/step
PRED: [[3.002677e-06]]
Diagnosis: NORMAL (Confidence: 1.00)
