In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras.callbacks import ReduceLROnPlateau
import cv2
import os
import numpy as np
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import random
#import gradio


In [2]:
labels = ["PNEUMONIA", "NORMAL"]
img_size = 150

def get_data(data_dir):
    data = []
    for label in labels:
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_path = os.path.join(path, img)
                img_arr = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                
                if img_arr is None:  # Check if the image was loaded successfully
                    print(f"Failed to read image: {img_path}")
                    continue
                
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                data.append([resized_arr, class_num])
            except Exception as e:
                print(f"Error processing {img}: {e}")
    return data
        

In [3]:
train = get_data("/kaggle/input/chest-xray-pneumonia/chest_xray/train")
test = get_data("/kaggle/input/chest-xray-pneumonia/chest_xray/test")
val = get_data("/kaggle/input/chest-xray-pneumonia/chest_xray/val")

In [4]:
data = train + val + test
print(len(data))
pneum = sum(1 for line in data if line[1] == 0)
norml = sum(1 for line in data if line[1] == 1)
print(pneum)
print(norml)
pneum - norml


5856
4273
1583


2690

In [5]:
# Shuffle the data to randomize order
random.seed(42)  # For reproducibility
random.shuffle(data)

# Separate images and labels
images = [item[0] for item in data]
labels = [item[1] for item in data]

# Step 1: Split into train and temp (validation + test)
x_train, temp_images, y_train, temp_labels = train_test_split(
    images, labels, test_size=0.2, stratify=labels, random_state=42
)

# Step 2: Split temp into validation and test
x_val, x_test, y_val, y_test = train_test_split(
    temp_images, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42
)

# Check the sizes of the splits
print(f"Train: {len(x_train)} images")
print(f"Validation: {len(x_val)} images")
print(f"Test: {len(x_test)} images")

Train: 4684 images
Validation: 586 images
Test: 586 images


In [6]:
#normalize data
x_train = np.array(x_train)/255
x_val = np.array(x_val)/255
x_test = np.array(x_test)/255

In [7]:
#resize data 
x_train = x_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_val = x_val.reshape(-1, img_size, img_size, 1)
y_val = np.array(y_val)

x_test = x_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

In [8]:
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)

In [9]:
model = Sequential()
model.add(Conv2D(32, (3, 3), strides=1, padding='same', activation='relu', input_shape=(150, 150, 1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2), strides=2, padding='same'))

model.add(Conv2D(64, (3,3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2), strides=2, padding='same'))

model.add(Conv2D(64, (3,3), strides=1, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2), strides=2, padding='same'))

model.add(Conv2D(128, (3,3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2), strides=2, padding='same'))

model.add(Conv2D(256, (3,3), strides=1, padding='same', activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2), strides=2, padding='same'))

model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(
    x_train, y_train,
    epochs=15,
    validation_data=(x_val, y_val),
    callbacks=[early_stopping]
)

Epoch 1/15
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 2s/step - accuracy: 0.9761 - loss: 0.0647 - val_accuracy: 0.9471 - val_loss: 0.1194
Epoch 2/15
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 2s/step - accuracy: 0.9821 - loss: 0.0445 - val_accuracy: 0.9522 - val_loss: 0.1384
Epoch 3/15
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 2s/step - accuracy: 0.9783 - loss: 0.0633 - val_accuracy: 0.9317 - val_loss: 0.1494
Epoch 4/15
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 2s/step - accuracy: 0.9775 - loss: 0.0567 - val_accuracy: 0.9505 - val_loss: 0.1298


In [12]:
print("loss", model.evaluate(x_test, y_test)[0])
print('accuracy', model.evaluate(x_test, y_test)[1]*100, '%')

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 257ms/step - accuracy: 0.9593 - loss: 0.1070
loss 0.11018591374158859
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 260ms/step - accuracy: 0.9593 - loss: 0.1070
accuracy 95.90443968772888 %


In [14]:
model.save('pneumonie_xray_prediction.h5')
