In [11]:
# Add the directory to sys.path
import sys
sys.path.append('/scratch/project_2010376')


In [12]:
import pandas as pd
import os
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Paths
TRAIN_CSV_PATH = '/scratch/project_2010376/vinbigdata-chest-xray-abnormalities-detection/train.csv'
TRAIN_DIR = '/scratch/project_2010376/vinbigdata-chest-xray-abnormalities-detection/processed_train'
PATCH_SIZE = 64
PATCHED_TRAIN_DIR = '/scratch/project_2010376/vinbigdata-chest-xray-abnormalities-detection/patched_train'

# Read the CSV
df = pd.read_csv(TRAIN_CSV_PATH)

# Class mappings
class_mappings = {
    0: "Aortic enlargement",
    1: "Atelectasis",
    2: "Calcification",
    3: "Cardiomegaly",
    4: "Consolidation",
    5: "ILD",
    6: "Infiltration",
    7: "Lung Opacity",
    8: "Nodule/Mass",
    9: "Other lesion",
    10: "Pleural effusion",
    11: "Pleural thickening",
    12: "Pneumothorax",
    13: "Pulmonary fibrosis",
    14: "No finding"
}

# Extract and save patches
def extract_and_save_patches(image_id, bboxes, class_ids, patch_size, output_dir):
    image_path = os.path.join(TRAIN_DIR, f"{image_id}.png")  # Assume the extension is .png
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        return

    if len(bboxes) > 0:
        for idx, (bbox, class_id) in enumerate(zip(bboxes, class_ids)):
            x_min, y_min, x_max, y_max = bbox
            patch = image[int(y_min):int(y_max), int(x_min):int(x_max)]
            if patch.shape[0] >= patch_size and patch.shape[1] >= patch_size:
                patch_resized = cv2.resize(patch, (patch_size, patch_size))
                class_name = class_mappings[class_id]
                class_dir = os.path.join(output_dir, class_name)
                os.makedirs(class_dir, exist_ok=True)
                patch_filename = f"{image_id}_patch_{idx}.png"
                cv2.imwrite(os.path.join(class_dir, patch_filename), patch_resized)
    else:
        # Handle 'No finding' case
        class_name = class_mappings[14]  # "No finding" class
        class_dir = os.path.join(output_dir, class_name)
        os.makedirs(class_dir, exist_ok=True)
        # Save the entire image as a patch for 'No finding'
        if image.shape[0] >= patch_size and image.shape[1] >= patch_size:
            patch_resized = cv2.resize(image, (patch_size, patch_size))
            patch_filename = f"{image_id}_patch_no_finding.png"
            cv2.imwrite(os.path.join(class_dir, patch_filename), patch_resized)

# Create the patched train directory if it does not exist
os.makedirs(PATCHED_TRAIN_DIR, exist_ok=True)

# Group the dataframe by image_id and process each group
for image_id, group in df.groupby('image_id'):
    bboxes = group[['x_min', 'y_min', 'x_max', 'y_max']].dropna().values
    class_ids = group['class_id'].dropna().values
    if len(class_ids) > 0 and 14 in class_ids:
        # Handle 'No finding' case when explicitly marked
        class_name = class_mappings[14]  # "No finding" class
        class_dir = os.path.join(PATCHED_TRAIN_DIR, class_name)
        os.makedirs(class_dir, exist_ok=True)
        image_path = os.path.join(TRAIN_DIR, f"{image_id}.png")
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is not None and image.shape[0] >= PATCH_SIZE and image.shape[1] >= PATCH_SIZE:
            patch_resized = cv2.resize(image, (PATCH_SIZE, PATCH_SIZE))
            patch_filename = f"{image_id}_patch_no_finding.png"
            cv2.imwrite(os.path.join(class_dir, patch_filename), patch_resized)
    else:
        if len(bboxes) > 0:
            extract_and_save_patches(image_id, bboxes, class_ids, PATCH_SIZE, PATCHED_TRAIN_DIR)
        else:
            # Handle 'No finding' case when no bounding boxes are present
            class_name = class_mappings[14]  # "No finding" class
            class_dir = os.path.join(PATCHED_TRAIN_DIR, class_name)
            os.makedirs(class_dir, exist_ok=True)
            image_path = os.path.join(TRAIN_DIR, f"{image_id}.png")
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if image is not None and image.shape[0] >= PATCH_SIZE and image.shape[1] >= PATCH_SIZE:
                patch_resized = cv2.resize(image, (PATCH_SIZE, PATCH_SIZE))
                patch_filename = f"{image_id}_patch_no_finding.png"
                cv2.imwrite(os.path.join(class_dir, patch_filename), patch_resized)


In [13]:
import os
import numpy as np
import cv2
from scipy.ndimage import zoom

def resize_with_ratio(image, dims, interpolation_flag):
    height, width = image.shape
    scale_factor = min(dims[0] / width, dims[1] / height)
    target_height = int(height * scale_factor)
    target_width = int(width * scale_factor)

    if interpolation_flag == "Spline":
        resized_image = zoom(image, scale_factor, order=3)
    else:
        resized_image = cv2.resize(image, (target_width, target_height), interpolation=interpolation_flag)

    pad_top = (dims[1] - target_height) // 2
    pad_bottom = dims[1] - target_height - pad_top
    pad_left = (dims[0] - target_width) // 2
    pad_right = dims[0] - target_width - pad_left

    padded_image = cv2.copyMakeBorder(resized_image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT)
    return padded_image


In [14]:
from tqdm import tqdm

def load_data(directory, image_size=(250, 250)):
    images = []
    labels = []
    category_mapping = {
        "No finding": "NORMAL",
        "Pneumothorax": "PNEUMONIA"
    }

    for original_category, mapped_category in category_mapping.items():
        category_path = os.path.join(directory, original_category)
        if not os.path.exists(category_path):
            print(f"Directory {category_path} does not exist. Skipping this category.")
            continue

        label = 0 if mapped_category == "NORMAL" else 1

        for file_name in tqdm(os.listdir(category_path)):
            image_path = os.path.join(category_path, file_name)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                print(f"Failed to load image: {image_path}. Skipping this file.")
                continue
            image = resize_with_ratio(image, (600, 600), cv2.INTER_AREA)
            image = image / 255.0
            image = np.expand_dims(image, axis=-1)
            images.append(image)
            labels.append(label)  # 0 for NORMAL, 1 for PNEUMONIA

    if not images or not labels:
        raise ValueError("No images or labels found. Please check the directory structure and file paths.")

    return np.array(images), np.array(labels)


In [15]:
from keras.layers import Input, SeparableConv2D, BatchNormalization, ELU, Add, ReLU, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, Concatenate
from keras.models import Model

def residual_block(layer, k, in_filters, out_filters, strides=(1, 1), use_shortcut=False):
    shortcut = layer

    layer = SeparableConv2D(in_filters, kernel_size=k, strides=(1, 1), padding="same")(layer)
    layer = BatchNormalization()(layer)
    layer = ELU()(layer)

    layer = SeparableConv2D(in_filters, kernel_size=k, strides=strides, padding="same")(layer)
    layer = BatchNormalization()(layer)
    layer = ELU()(layer)

    layer = SeparableConv2D(out_filters, kernel_size=k, strides=(1, 1), padding="same")(layer)
    layer = BatchNormalization()(layer)

    if strides != (1, 1) or use_shortcut:
        shortcut = SeparableConv2D(out_filters, kernel_size=k, strides=strides, padding="same")(shortcut)
        shortcut = BatchNormalization()(shortcut)

    layer = Add()([shortcut, layer])
    layer = ReLU()(layer)
    return layer

def cnn_branch(input_tensor, k=(4, 4)):
    layer = SeparableConv2D(16, kernel_size=k, strides=(1, 1), padding="same")(input_tensor)
    layer = BatchNormalization()(layer)
    layer = ReLU()(layer)

    for filters in [16, 32, 64, 128, 256, 384]:
        layer = residual_block(layer, k, filters, filters * 2, use_shortcut=True)
        if filters != 384:
            layer = MaxPooling2D()(layer)
    
    layer = GlobalAveragePooling2D()(layer)
    return layer

def create_cnn_model(input_shape=(75, 75, 1), k=(5, 5)):
    input_tensor = Input(shape=input_shape)
    branch_1 = cnn_branch(input_tensor, k)
    branch_2 = cnn_branch(input_tensor, k)
    merged = Concatenate()([branch_1, branch_2])

    dense_layer = Dense(512, activation="relu")(merged)
    dense_layer = Dropout(0.5)(dense_layer)
    dense_layer = Dense(256, activation="relu")(dense_layer)
    dense_layer = Dropout(0.5)(dense_layer)
    dense_layer = Dense(128, activation="relu")(dense_layer)
    dense_layer = Dropout(0.5)(dense_layer)
    dense_layer = Dense(64, activation="relu")(dense_layer)
    dense_layer = Dropout(0.5)(dense_layer)
    output_layer = Dense(2, activation="softmax")(dense_layer)

    model = Model(inputs=input_tensor, outputs=output_layer)
    return model


In [16]:
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import StratifiedShuffleSplit

class MaxAccuracy(Callback):
    def __init__(self):
        super(MaxAccuracy, self).__init__()
        self.max_train_accuracy = 0.0
        self.max_val_accuracy = 0.0

    def on_epoch_end(self, epoch, logs=None):
        train_accuracy = logs['accuracy']
        val_accuracy = logs['val_accuracy']
        if train_accuracy > self.max_train_accuracy:
            self.max_train_accuracy = train_accuracy
        if val_accuracy > self.max_val_accuracy:
            self.max_val_accuracy = val_accuracy
        print(f"Max Train Accuracy: {self.max_train_accuracy:.4f}, Max Validation Accuracy: {self.max_val_accuracy:.4f}")

def train_model(model, model_loc, x, y, n_splits=1, test_size=0.25, random_state=47, n_epochs=30, min_learning_rate=1e-7, lr_decay_factor=0.8):
    splitter = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
    aug_set_id = 0
    histories = []
    max_accuracy = MaxAccuracy()
    
    for train_ids, test_ids in splitter.split(x, y):
        checkpoint = ModelCheckpoint(f"model_{model_loc}_{aug_set_id}.keras", monitor="val_accuracy", save_best_only=True, mode="max")
        aug_set_id += 1

        x_train, x_test, y_train, y_test = x[train_ids], x[test_ids], y[train_ids], y[test_ids]
        
        histories.append(model.fit(x_train, y_train, batch_size=32, epochs=n_epochs, validation_data=(x_test, y_test),
                                   callbacks=[ReduceLROnPlateau(monitor="val_accuracy", factor=lr_decay_factor, patience=2, min_lr=min_learning_rate),
                                              checkpoint, max_accuracy]))
    return histories


In [17]:
data_directory = '/scratch/project_2010376/vinbigdata-chest-xray-abnormalities-detection/patched_train/'
images, labels = load_data(data_directory)

from keras.utils import to_categorical
labels = to_categorical(labels)


100%|██████████| 10606/10606 [00:23<00:00, 457.11it/s]
100%|██████████| 124/124 [00:00<00:00, 453.30it/s]


In [18]:
from keras.backend import clear_session
from keras.utils import plot_model
from keras.optimizers import RMSprop

# Ensure the clear_session function is called before starting training
clear_session()

n_splits = 8
res = 600
factor = res // n_splits
n_model = 0
histories = {}

# Loop over the n_splits x n_splits grid
for i in range(n_splits):
    for j in range(n_splits):
        print(f"Training model for split {i}-{j}...")  # Progress indicator

        # Create and compile the model
        model = create_cnn_model()
        plot_model(model, to_file=f'model_plot_{n_model}.png', show_shapes=True, show_layer_names=True)
        model.compile(optimizer=RMSprop(), metrics=["accuracy"], loss="categorical_crossentropy")

        # Select subset of images
        subset_images = images[:, i*factor:min((i+1)*factor, res), j*factor:min((j+1)*factor, res), :]
        
        # Train the model on the subset
        histories[n_model] = train_model(model, n_model, subset_images, labels)
        
        # Increment the model counter
        n_model += 1

        # Clear the session to free up resources
        clear_session()

print("Training completed.")


Training model for split 0-0...


2024-07-19 19:59:44.556577: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:282] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 1/30
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 213ms/step - accuracy: 0.9658 - loss: 0.1418Max Train Accuracy: 0.9831, Max Validation Accuracy: 0.9884
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 232ms/step - accuracy: 0.9659 - loss: 0.1417 - val_accuracy: 0.9884 - val_loss: 0.4955 - learning_rate: 0.0010
Epoch 2/30
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step - accuracy: 0.9883 - loss: 0.0906Max Train Accuracy: 0.9884, Max Validation Accuracy: 0.9884
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 224ms/step - accuracy: 0.9883 - loss: 0.0907 - val_accuracy: 0.9884 - val_loss: 0.0711 - learning_rate: 0.0010
Epoch 3/30
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 213ms/step - accuracy: 0.9883 - loss: 0.1148Max Train Accuracy: 0.9884, Max Validation Accuracy: 0.9884
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 225ms/step - accuracy: 0.9883 - lo

KeyboardInterrupt: 

In [19]:
import pickle
from keras.models import load_model

with open('histories.pkl', 'wb') as f:
    pickle.dump(histories, f)

model_accs = {}
model = create_cnn_model()
model.compile(optimizer=RMSprop(), metrics=["accuracy"], loss="binary_crossentropy")

for n_model in range(64):
    best_split = 0
    for n_split in range(1):
        model.load_weights(f"model_{n_model}_{n_split}.keras")
        eval = model.evaluate(images[:, int(n_model/n_splits)*factor:min((int(n_model/n_splits)+1)*factor, res), int(n_model%n_splits)*factor:min((int(n_model%n_splits)+1)*factor, res), :], labels)
        best_split = max(best_split, eval[1])
        model_accs[n_model] = best_split
    print(f"Region {n_model} : {best_split:.4f}")

models = {}
for n_model in model_accs:
    if model_accs[n_model] > 0.97:
        model = load_model(f"model_{n_model}_0.keras")
        models[n_model] = model


  saveable.load_own_variables(weights_store.get(inner_path))


[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 36ms/step - accuracy: 0.9956 - loss: 0.0125
Region 0 : 0.9944
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9990 - loss: 56.4199
Region 1 : 0.9966
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9977 - loss: 0.0032
Region 2 : 0.9973
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9983 - loss: 0.0031
Region 3 : 0.9977
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9985 - loss: 0.0041
Region 4 : 0.9973
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9987 - loss: 0.0024
Region 5 : 0.9967
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9989 - loss: 0.0021
Region 6 : 0.9982
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9979 

FileNotFoundError: [Errno 2] No such file or directory: 'model_20_0.keras'

In [20]:
import os
import pickle
from keras.models import load_model

# Save the training histories
with open('histories.pkl', 'wb') as f:
    pickle.dump(histories, f)

model_accs = {}
model = create_cnn_model()
model.compile(optimizer=RMSprop(), metrics=["accuracy"], loss="binary_crossentropy")

# Loop through the models that have been trained
for n_model in range(20):  # Adjust the range according to the number of models trained
    model_file = f"model_{n_model}_0.keras"
    if os.path.exists(model_file):
        best_split = 0
        for n_split in range(1):  # Adjust the range if you have multiple splits
            model.load_weights(model_file)
            eval = model.evaluate(images[:, int(n_model/n_splits)*factor:min((int(n_model/n_splits)+1)*factor, res), int(n_model%n_splits)*factor:min((int(n_model%n_splits)+1)*factor, res), :], labels)
            best_split = max(best_split, eval[1])
        model_accs[n_model] = best_split
        print(f"Region {n_model} : {best_split:.4f}")
    else:
        print(f"Model file {model_file} not found. Skipping this model.")

# Save the models that have an accuracy above the threshold
models = {}
for n_model in model_accs:
    if model_accs[n_model] > 0.97:
        model_file = f"model_{n_model}_0.keras"
        if os.path.exists(model_file):
            model = load_model(model_file)
            models[n_model] = model

print("Model evaluation completed.")


[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 36ms/step - accuracy: 0.9956 - loss: 0.0125
Region 0 : 0.9944
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9990 - loss: 56.4199
Region 1 : 0.9966
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9977 - loss: 0.0032
Region 2 : 0.9973
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9983 - loss: 0.0031
Region 3 : 0.9977
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9985 - loss: 0.0041
Region 4 : 0.9973
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9987 - loss: 0.0024
Region 5 : 0.9967
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9989 - loss: 0.0021
Region 6 : 0.9982
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.9979 

In [28]:
import os
import pickle
import numpy as np
import cv2
from tqdm import tqdm
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import RMSprop

# Function to resize with ratio
def resize_with_ratio(image, dims, interpolation_flag):
    height, width = image.shape
    scale_factor = min(dims[0] / width, dims[1] / height)
    target_height = int(height * scale_factor)
    target_width = int(width * scale_factor)

    if interpolation_flag == "Spline":
        resized_image = zoom(image, scale_factor, order=3)
    else:
        resized_image = cv2.resize(image, (target_width, target_height), interpolation=interpolation_flag)

    pad_top = (dims[1] - target_height) // 2
    pad_bottom = dims[1] - target_height - pad_top
    pad_left = (dims[0] - target_width) // 2
    pad_right = dims[0] - target_width - pad_left

    padded_image = cv2.copyMakeBorder(resized_image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT)
    return padded_image

# Function to preprocess image
def preprocess_image(image_path, target_size=(75, 75)):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = resize_with_ratio(image, target_size, cv2.INTER_AREA)
    image = image / 255.0
    image = np.expand_dims(image, axis=-1)
    return image

# Function to load new data
def load_new_data(directory, target_size=(75, 75)):
    images = []
    image_paths = []
    for file_name in tqdm(os.listdir(directory)):
        image_path = os.path.join(directory, file_name)
        image = preprocess_image(image_path, target_size=target_size)
        images.append(image)
        image_paths.append(image_path)
    return np.array(images), image_paths

# Directory of new dataset
new_data_directory = '/projappl/project_2010376/Pneumonia Analysis Dataset'
normal_directory = os.path.join(new_data_directory, 'normal')
pneumonia_directory = os.path.join(new_data_directory, 'pneumonia')

# Load new data
normal_images, normal_image_paths = load_new_data(normal_directory, target_size=(75, 75))
pneumonia_images, pneumonia_image_paths = load_new_data(pneumonia_directory, target_size=(75, 75))

# Combine the images and labels for evaluation
images = np.concatenate((normal_images, pneumonia_images), axis=0)
labels = np.array([[1, 0]] * len(normal_images) + [[0, 1]] * len(pneumonia_images))

# Function to evaluate a model
def evaluate_model(model_path, images, labels):
    model = load_model(model_path)
    model.compile(optimizer=RMSprop(), metrics=["accuracy"], loss="binary_crossentropy")
    eval = model.evaluate(images, labels, verbose=0)
    return eval[1]  # Return accuracy

# Evaluate all trained models to find the best one
model_accs = {}
for n_model in range(20):  # Adjust the range according to the number of models trained
    model_file = f"model_{n_model}_0.keras"
    if os.path.exists(model_file):
        accuracy = evaluate_model(model_file, images, labels)
        model_accs[model_file] = accuracy
        print(f"Model: {model_file}, Accuracy: {accuracy:.4f}")
    else:
        print(f"Model file {model_file} not found. Skipping this model.")

# Save the evaluation history
with open('model_accs.pkl', 'wb') as f:
    pickle.dump(model_accs, f)

# Identify the best model
best_model_path = max(model_accs, key=model_accs.get)
best_accuracy = model_accs[best_model_path]
print(f"Best Model: {best_model_path}, Accuracy: {best_accuracy:.4f}")

# Load the best model for future predictions
best_model = load_model(best_model_path)


100%|██████████| 1525/1525 [00:17<00:00, 86.38it/s]
100%|██████████| 1525/1525 [00:08<00:00, 176.05it/s]


Model: model_0_0.keras, Accuracy: 0.4607
Model: model_1_0.keras, Accuracy: 0.4748
Model: model_2_0.keras, Accuracy: 0.4443
Model: model_3_0.keras, Accuracy: 0.4931
Model: model_4_0.keras, Accuracy: 0.4993
Model: model_5_0.keras, Accuracy: 0.4711
Model: model_6_0.keras, Accuracy: 0.4826
Model: model_7_0.keras, Accuracy: 0.1757
Model: model_8_0.keras, Accuracy: 0.5000
Model: model_9_0.keras, Accuracy: 0.5052
Model: model_10_0.keras, Accuracy: 0.4626
Model: model_11_0.keras, Accuracy: 0.4787
Model: model_12_0.keras, Accuracy: 0.4934
Model: model_13_0.keras, Accuracy: 0.3816
Model: model_14_0.keras, Accuracy: 0.2990
Model: model_15_0.keras, Accuracy: 0.4452
Model: model_16_0.keras, Accuracy: 0.5469
Model: model_17_0.keras, Accuracy: 0.5000
Model: model_18_0.keras, Accuracy: 0.5000
Model: model_19_0.keras, Accuracy: 0.4885
Best Model: model_16_0.keras, Accuracy: 0.5469


In [33]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation generator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Example of augmenting a batch of images
def augment_images(images, labels):
    augmented_images, augmented_labels = [], []
    for image, label in zip(images, labels):
        image = np.expand_dims(image, 0)  # Add batch dimension
        for _ in range(5):  # Generate 5 augmented images per original image
            augmented = next(datagen.flow(image, batch_size=1))
            augmented_images.append(augmented[0])
            augmented_labels.append(label)
    return np.array(augmented_images), np.array(augmented_labels)

# Augment the data
augmented_images, augmented_labels = augment_images(images, labels)


In [34]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import RMSprop

# Fine-tune the best model on the new dataset
best_model = load_model(best_model_path)

# Compile the model
best_model.compile(optimizer=RMSprop(learning_rate=1e-5), metrics=["accuracy"], loss="binary_crossentropy")

# Fine-tune the model
checkpoint = ModelCheckpoint('best_model_finetuned.keras', monitor='val_accuracy', save_best_only=True)
early_stop = EarlyStopping(monitor='val_accuracy', patience=5)

history = best_model.fit(
    augmented_images, augmented_labels,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[checkpoint, early_stop]
)

# Load the fine-tuned model
best_model_finetuned = load_model('best_model_finetuned.keras')


Epoch 1/50
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 231ms/step - accuracy: 0.6285 - loss: 7.7824 - val_accuracy: 0.7410 - val_loss: 0.9143
Epoch 2/50
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 227ms/step - accuracy: 0.6364 - loss: 0.8635 - val_accuracy: 0.7964 - val_loss: 0.7185
Epoch 3/50
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 224ms/step - accuracy: 0.6487 - loss: 0.6689 - val_accuracy: 0.8033 - val_loss: 0.7338
Epoch 4/50
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 220ms/step - accuracy: 0.6789 - loss: 0.6252 - val_accuracy: 0.7813 - val_loss: 0.7721
Epoch 5/50
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 221ms/step - accuracy: 0.7281 - loss: 0.5862 - val_accuracy: 0.8033 - val_loss: 0.8067
Epoch 6/50
[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 225ms/step - accuracy: 0.7774 - loss: 0.5576 - val_accuracy: 0.8351 - val_loss: 0.8256
Epoch 7/5

In [35]:
# Evaluate the fine-tuned model on the new dataset
normal_predictions = best_model_finetuned.predict(normal_images)
pneumonia_predictions = best_model_finetuned.predict(pneumonia_images)

# Function to evaluate predictions
def evaluate_predictions(image_paths, predictions, true_label):
    correct = 0
    incorrect = 0
    for image_path, prediction in zip(image_paths, predictions):
        predicted_label = 'PNEUMONIA' if np.argmax(prediction) == 1 else 'NORMAL'
        if predicted_label == true_label:
            correct += 1
        else:
            incorrect += 1
        print(f"Image: {image_path}, Prediction: {predicted_label}, Actual: {true_label}")
    return correct, incorrect

# Evaluate predictions
normal_correct, normal_incorrect = evaluate_predictions(normal_image_paths, normal_predictions, 'NORMAL')
pneumonia_correct, pneumonia_incorrect = evaluate_predictions(pneumonia_image_paths, pneumonia_predictions, 'PNEUMONIA')

# Summary
total_correct = normal_correct + pneumonia_correct
total_incorrect = normal_incorrect + pneumonia_incorrect
total_images = total_correct + total_incorrect

print(f"Total Correct: {total_correct}")
print(f"Total Incorrect: {total_incorrect}")
print(f"Accuracy: {total_correct / total_images:.4f}")


[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 58ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000362_004.png, Prediction: NORMAL, Actual: NORMAL
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000054_008.png, Prediction: NORMAL, Actual: NORMAL
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000633_001.png, Prediction: NORMAL, Actual: NORMAL
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000869_001.png, Prediction: NORMAL, Actual: NORMAL
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000180_001.png, Prediction: NORMAL, Actual: NORMAL
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000373_001.png, Prediction: PNEUMONIA, Actual: NORMAL
Image: /projappl/project_2010376/Pneumonia Analysis Dataset/normal/00000280_000.png, Prediction: NORMAL, Actual: NORMAL
Image: /p