In [11]:
import os
import zipfile
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import train_test_split


In [None]:
# Kaggle dataset downloading and extracting
os.environ["KAGGLE_USERNAME"] = "bartoszpa"
os.environ["KAGGLE_KEY"] = "2aa9f256d6b11081fe8bbc01dce8b211"

!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

with zipfile.ZipFile("chest-xray-pneumonia.zip", "r") as zip_ref:
    zip_ref.extractall("dataset")

In [14]:
train_path = "C:/Users/Bartek/Desktop/STUDIA/magister/dataset/chest_xray/train"
test_path = "C:/Users/Bartek/Desktop/STUDIA/magister/dataset/chest_xray/test"

# Function to load and preprocess images
def load_and_preprocess_data(data_path, label):
    images = []
    labels = []
    for filename in os.listdir(data_path):
        img_path = os.path.join(data_path, filename)
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=(128, 128))
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        images.append(img_array)
        labels.append(label)
    return np.array(images), np.array(labels)

In [15]:
# Load and preprocess training data
train_pneumonia_images, train_pneumonia_labels = load_and_preprocess_data(os.path.join(train_path, "PNEUMONIA"), 1)
train_normal_images, train_normal_labels = load_and_preprocess_data(os.path.join(train_path, "NORMAL"), 0)

# Combine pneumonia and normal data
train_images = np.concatenate((train_pneumonia_images, train_normal_images), axis=0)
train_labels = np.concatenate((train_pneumonia_labels, train_normal_labels), axis=0)

# Split the dataset into train and validation sets
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

# Load and preprocess test data
test_pneumonia_images, test_pneumonia_labels = load_and_preprocess_data(os.path.join(test_path, "PNEUMONIA"), 1)
test_normal_images, test_normal_labels = load_and_preprocess_data(os.path.join(test_path, "NORMAL"), 0)

# Combine pneumonia and normal test data
test_images = np.concatenate((test_pneumonia_images, test_normal_images), axis=0)
test_labels = np.concatenate((test_pneumonia_labels, test_normal_labels), axis=0)

# Normalize pixel values to be between 0 and 1
val_images, test_images = val_images / 255.0, test_images / 255.0

In [16]:
# Create a generator to augment data (parameters from previous master thesis)
train_datagen = ImageDataGenerator(
    brightness_range=[0.8, 1.2],
    zoom_range=[0.9, 1.1],
    rotation_range=10,
    width_shift_range=0.15,
    height_shift_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    rescale=1./255
)

train_generator = train_datagen.flow(train_images, train_labels, batch_size=16)

In [None]:
# Build the own CNN model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history_own = model.fit(train_generator, epochs=100, validation_data=(val_images, val_labels))

In [26]:
import matplotlib.pyplot as plt

# Function to plot loss curves

def plot_loss(history):
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid()
    plt.legend()
    plt.show()

In [None]:
resnet50 = tf.keras.applications.ResNet50(
    include_top=False, 
    weights=None, 
    input_shape=(128, 128, 3)
)

inputs = tf.keras.Input(shape=(128, 128, 3))
x = resnet50(inputs)
x = tf.keras.layers.GlobalAveragePooling2D()(x) 
x = tf.keras.layers.Dense(512, activation='relu')(x) 
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model_nt = tf.keras.Model(inputs, outputs)

model_nt.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_nt.summary()

In [None]:
history_resnet_nt = model_nt.fit(train_generator, epochs=100, validation_data=(val_images, val_labels))

In [None]:
resnet50_pt = tf.keras.applications.ResNet50(
    include_top=False, 
    weights='imagenet', 
    input_shape=(128, 128, 3)
)

for layer in resnet50_pt.layers:
    layer.trainable = False

inputs = tf.keras.Input(shape=(128, 128, 3))
x = resnet50_pt(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x) 
x = tf.keras.layers.Dense(512, activation='relu')(x) 
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x) 

model_pt = tf.keras.Model(inputs, outputs)

model_pt.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_pt.summary()

In [None]:
history_resnet_pt = model_pt.fit(train_generator, epochs=100, validation_data=(val_images, val_labels))

In [None]:
effnetB0_nt = tf.keras.applications.EfficientNetB0(
    include_top=False, 
    weights=None,  
    input_shape=(128, 128, 3)
)

inputs = tf.keras.Input(shape=(128, 128, 3))
x = effnetB0_nt(inputs)
x = tf.keras.layers.GlobalAveragePooling2D()(x) 
x = tf.keras.layers.Dense(512, activation='relu')(x) 
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model_eff_nt = tf.keras.Model(inputs, outputs)

model_eff_nt.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_eff_nt.summary()

In [None]:
history_eff_nt = model_eff_nt.fit(train_generator, epochs=100, validation_data=(val_images, val_labels))

In [None]:
effnetB0_pt = tf.keras.applications.EfficientNetB0(
    include_top=False, 
    weights='imagenet',
    input_shape=(128, 128, 3)
)

for layer in effnetB0_pt.layers:
    layer.trainable = False

inputs = tf.keras.Input(shape=(128, 128, 3))
x = effnetB0_pt(inputs)
x = tf.keras.layers.GlobalAveragePooling2D()(x) 
x = tf.keras.layers.Dense(512, activation='relu')(x) 
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model_eff_pt = tf.keras.Model(inputs, outputs)

model_eff_pt.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_eff_pt.summary()

In [None]:
history_eff_pt = model_eff_pt.fit(train_generator, epochs=100, validation_data=(val_images, val_labels))

In [None]:
plot_loss(history_own)
plot_loss(history_resnet_pt)
plot_loss(history_resnet_nt)
plot_loss(history_eff_pt)
plot_loss(history_eff_nt)

In [None]:
y_pred_own = model.predict(test_images)
y_pred_res_pt = model_pt.predict(test_images)
y_pred_res_nt = model_nt.predict(test_images)
y_pred_eff_pt = model_eff_pt.predict(test_images)
y_pred_eff_nt = model_eff_nt.predict(test_images)

for i in range(len(test_labels)):
    if y_pred_own[i] > 0.5: