<a href="https://colab.research.google.com/github/sarashahin/ML_Research/blob/main/Lung_research_ResNet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'lung-resnet:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4494542%2F7699872%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240322%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240322T151008Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Daa65685ca2f2da00e751c49fc2bb819fea7938d81822b86599d769315be28860f808319b4df8ec8561a825c8b6ab2452b83b8fc30df1be68bce88abce6f3b80e641dd0b9b4dd860b5ea4b806f54dd620171614efbc91f231202e3bcb22ef5cfcdc3d4a91eda6011403884033e54a0ee2b1ed2b56fde4ef5012434702b6a32daad483abad62d3e239c08988661a6060e88aea22c58a29dd8038365a92cf4fcc4d7419d891eeb740c505ba1dd051784d739714bba9c167a72d6664391b9d4c505d411cb4d02ba1f192e68f649ce9dc87053e2f1872aa5b266c8ec43a9b365cb568997734ebf66df8f9da3a4cdd82f5e77095684d11ae5a429d07e56e68c7ea6bc3'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
#  import important Libraries

from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import load_img
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import KFold
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import DirectoryIterator
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import GaussianNoise
from sklearn.metrics import classification_report, accuracy_score


import tensorflow as tf
import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# check for GPU availability
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.config.experimental.list_physical_devices('GPU')

In [None]:
# initialize the random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

In [None]:
# load pretrained ResNet50 model without the top layer
# using weights that have been trained on the ImageNet dataset
base_model = ResNet50(weights='imagenet', include_top=False)


In [None]:
# freeze/Unfreeze the layers of the base model
# for layer in base_model.layers:
#     layer.trainable = False

# for layer in base_model.layers[-2:]:
#     layer.trainable = True

# freeze the initial layers
for layer in base_model.layers[:4]:
    layer.trainable = True

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# with the path
dataset_path_train = '/kaggle/input/lung-resnet/train 2'
dataset_path_test = '/kaggle/input/lung-resnet/test 2'
train_path = os.path.join(dataset_path_train, 'train')
test_path = os.path.join(dataset_path_test, 'test')



In [None]:
def display_random_images(class_names, train_path, num_images=5):
    fig, axes = plt.subplots(len(class_names), num_images, figsize=(20, 15))
    for row, cls in enumerate(class_names):
        path = os.path.join(train_path, cls)
        images = os.listdir(path)
        selected_images = random.sample(images, num_images)
        print(f"Class: {cls}")
        for col, img in enumerate(selected_images):
            img_path = os.path.join(path, img)
            image = load_img(img_path, target_size=(240, 240))
            if len(class_names) == 1:
                ax = axes[col]
            else:
                ax = axes[row, col]
            ax.imshow(image)
            ax.set_title(cls)
            ax.axis('off')

    plt.tight_layout()
    plt.show()



class_names = ['BENIGN', 'MALIGNANT', 'NORMAL']
display_random_images(class_names, train_path)

In [None]:

#  imbalance classes train set

def plot_class_distribution(train_path, class_names):
    class_counts = {}
    for cls in class_names:
        path = os.path.join(train_path, cls)
        count = len(os.listdir(path))
        class_counts[cls] = count

    sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
    plt.title("Class Distribution")
    plt.xlabel("Classes")
    plt.ylabel("Number of images")
    plt.show()

plot_class_distribution(train_path, class_names)


In [None]:
def plot_class_distribution(test_path_path, class_names):
    class_counts = {}
    for cls in class_names:
        path = os.path.join(test_path, cls)
        count = len(os.listdir(path))
        class_counts[cls] = count

    sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
    plt.title("Class Distribution")
    plt.xlabel("Classes")
    plt.ylabel("Number of images")
    plt.show()

plot_class_distribution(test_path, class_names)

In [None]:
# SNR (dB) = 10 × log ⁡ 10 ( signal power/ noise power )

# power of the signal (image pixel values)

# noise power(variance of the noise)



def calculate_snr(image, snr_db):
    # calculate signal power
    signal_power = np.mean(image ** 2)

    # calculate noise power
    noise_power = signal_power / (10 ** (snr_db / 10)) #Convert SNR from dB to linear scale and calculate noise power
    return noise_power


In [None]:
def add_gaussian_noise(image, snr_db):
    row, col, ch = image.shape
    mean = 0
    noise_power = calculate_snr(image, snr_db)
    sigma = np.sqrt(noise_power)
    gauss = np.random.normal(mean, sigma, (row, col, ch))
    noisy = image + gauss
    return np.clip(noisy, 0, 255)


def add_salt_pepper_noise(image, snr_db):
    row, col, ch = image.shape
    s_vs_p = 0.5
    snr_linear = 10 ** (snr_db / 10) #converting it to a linear scale makes it easier in calculations
    corruption_ratio = 0.20 / (1 + snr_linear) #mapping the SNR to the amount of salt and pepper noise to add to the image
    # using a logarithmic scale for non-linear mapping
    # corruption_ratio = np.log10(1 + snr_linear**2) / np.log10(1 + max(snr_range)**2)  # max(snr_range) for normalization
    amount = corruption_ratio * image.size
    num_salt = np.ceil(amount * s_vs_p).astype(int)
    num_pepper = np.ceil(amount * (1 - s_vs_p)).astype(int)
    coords = [np.random.randint(0, i - 1, num_salt) for i in image.shape]
    image[coords] = 1
    coords = [np.random.randint(0, i - 1, num_pepper) for i in image.shape]
    image[coords] = 0
    return image




In [None]:
# define SNR range between 30 - 40db
snr_range = np.linspace(28, 40, num=5)

# global variables to log noise details
global_noise_log = []

def custom_preprocessing_function(image):
    image = tf.keras.applications.resnet50.preprocess_input(image)
    snr_db = random.choice(snr_range)
    noise_type = random.choice(['gaussian', 'salt_pepper'])

    # add noise to the image
    if noise_type == 'gaussian':
        image = add_gaussian_noise(image, snr_db)
    else:
        image = add_salt_pepper_noise(image, snr_db)

    # log noise details
    global_noise_log.append({'noise_type': noise_type, 'snr': snr_db})

    return image


In [None]:
# function to display images
def plotImages(images_arr, n_images=5):
    fig, axes = plt.subplots(1, n_images, figsize=(30, 35))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img.astype('uint8'))
        ax.axis('off')
    plt.tight_layout()
    plt.show()

Using K-Fold cross-validation, the data is split into training and validation sets multiple times. For each split, data generators are created to feed both noisy and clean images into the model for training and evaluation.

In [None]:
# get all image paths and their labels
def get_images_and_labels(base_dir):
    classes = ['BENIGN', 'MALIGNANT', 'NORMAL']
    images = []
    labels = []
    for cls in classes:
        cls_folder = os.path.join(base_dir, cls)
        cls_images = [os.path.join(cls_folder, filename) for filename in os.listdir(cls_folder)]
        images += cls_images
        labels += [cls] * len(cls_images)
    return images, labels

train_images, train_labels = get_images_and_labels(train_path)

In [None]:
# create a DataFrame with the file paths and labels
df = pd.DataFrame({'filename': train_images, 'class': train_labels})

In [None]:
# define ImageDataGenerators
# is used for data augmentation and preprocessing of training a machine learning model with noisy images
train_datagen_noise = ImageDataGenerator(preprocessing_function=
        custom_preprocessing_function,
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        # vertical_flip=True,
        shear_range=0.1,  #
        zoom_range=0.1,
        brightness_range=[0.8,1.2])


In [None]:
train_datagen_clean = ImageDataGenerator(preprocessing_function=
        preprocess_input,
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        # vertical_flip=True,
        shear_range=0.1,
        zoom_range=0.1,
        brightness_range=[0.8,1.2])

In [None]:
val_datagen_clean = ImageDataGenerator(preprocessing_function=preprocess_input)
val_datagen_noise = ImageDataGenerator(preprocessing_function=custom_preprocessing_function)

In [None]:
# perform K-Fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)

In [None]:
# loop iterates over each fold//  get the indices of the training and validation sets
for fold, (train_idx, val_idx) in enumerate(kf.split(df)):
    print(f"Running fold {fold + 1}")
    train_df = df.iloc[train_idx]
    val_df = df.iloc[val_idx]

    # Create the generators using flow_from_dataframe
    train_generator_noise = train_datagen_noise.flow_from_dataframe(
        dataframe=train_df,
        directory=None,
        x_col='filename',
        y_col='class',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical'
    )

    train_generator_clean = train_datagen_clean.flow_from_dataframe(
        dataframe=train_df,
        directory=None,
        x_col='filename',
        y_col='class',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical'
    )

    val_generator_clean = val_datagen_clean.flow_from_dataframe(
        dataframe=val_df,
        directory=None,
        x_col='filename',
        y_col='class',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical'
    )

    val_generator_noise = val_datagen_noise.flow_from_dataframe(
    dataframe=val_df,
    directory=None,
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
    )

In [None]:
    # retrieve and display images from the noisy data generator
    print("train_generator_noise:")
    images, labels = next(train_generator_noise)
    plotImages(images[:5])

    print("train_generator_clean:")
    # retrieve and display images from the clean data generator
    images, labels = next(train_generator_clean)
    plotImages(images[:5])

In [None]:
    # add custom layers on top of the base model
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(1024, activation='relu', kernel_regularizer=l2(0.4))(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu', kernel_regularizer=l2(0.4))(x)
    x = Dropout(0.5)(x)
    # x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    # x = Dropout(0.5)(x)  # Additional dropout layer
    # add a GaussianNoise layer to simulate and denoise from noise during training
    # x = GaussianNoise(0.1)(x)
    predictions = Dense(3, activation='softmax')(x)  # 3 classes benign, malignant, normal

    # create the final model
    model = Model(inputs=base_model.input, outputs=predictions)


    model.summary()

In [None]:
    plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)

In [None]:
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy', Precision(), Recall(), AUC()])

In [None]:
    # class weights for the current training fold/ balancing training set
    train_classes = np.concatenate([train_generator_clean.classes, train_generator_noise.classes])

    class_weights = class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(train_classes),
        y=train_classes)

    class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

In [None]:
    def plot_class_distribution_with_weights(train_path, class_names, class_weights_dict):
        class_counts = {}
        for cls in class_names:
            path = os.path.join(train_path, cls)
            count = len(os.listdir(path))
            class_counts[cls] = count

        # Adjust class counts based on the class weights
        weighted_counts = {cls: count * class_weights_dict[i] for i, (cls, count) in enumerate(class_counts.items())}

        # Plotting the weighted class distribution
        sns.barplot(x=list(weighted_counts.keys()), y=list(weighted_counts.values()), color='seagreen')
        plt.title("Weighted Class Distribution")
        plt.xlabel("Classes")
        plt.ylabel("Weighted number of images")
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()


    plot_class_distribution_with_weights(train_path, class_names, class_weights_dict)

In [None]:
    # define the EarlyStopping callback
    early_stopping = EarlyStopping(
        monitor='val_loss',  # monitor the validation loss
        patience=15, #number of epochs with no improvement the learning rate will be reduced
        verbose=1,    # number of epochs with no improvement after which training will be stopped
        restore_best_weights=True)  # restore model weights from the epoch with the best value of the monitored quantity

In [None]:
    # use a callback for learning rate scheduling //// avoiding overfitting and can be better convergence during training
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10,verbose=1, mode='auto',min_delta=0.0001, cooldown=0, min_lr=0)# min_lr: meaning there is no lower limit

In [None]:
    model_checkpoint_clean = ModelCheckpoint('clean_best_model.h5', save_best_only=True)  # Save the best model
    model_checkpoint_noise = ModelCheckpoint('noise_best_model.h5', save_best_only=True)  # Save the best model

In [None]:
    # visualize noise impact on the model validation accuracy over epochs.
    # visualize noise fluctuation during the training(noisy) plot and

    class NoiseScheduler(tf.keras.callbacks.Callback):
        def __init__(self, snr_range, noise_types):
            super(NoiseScheduler, self).__init__()
            self.snr_range = snr_range
            self.noise_types = noise_types
            self.snr_log = []
            self.noise_type_log = []  # Log for noise types
            self.validation_accuracy = []
            self.epoch_log = []  # log for epoch numbers

        def on_epoch_begin(self, epoch, logs=None):
            # randomly select SNR and noise type for the current epoch
            snr_db = random.choice(self.snr_range)
            noise_type = random.choice(self.noise_types)
            self.snr_log.append(snr_db)
            self.noise_type_log.append(noise_type)  # Log the noise type

        def on_epoch_end(self, epoch, logs=None):
            val_acc = logs['val_accuracy'] if 'val_accuracy' in logs else None
            self.validation_accuracy.append(val_acc)
            self.epoch_log.append(epoch + 1)  # Log the epoch number
            # log the SNR value and noise type
            print(f"Epoch {epoch+1} - SNR: {self.snr_log[-1]} dB, Noise Type: {self.noise_type_log[-1]}")

        def on_train_end(self, logs=None):
            # plot SNR values, noise type, and validation accuracy over epochs
            epochs = list(range(1, len(self.validation_accuracy) + 1))
            fig, ax1 = plt.subplots()

            ax1.set_xlabel('Epoch')
            ax1.set_ylabel('SNR (dB)', color='tab:red')
            ax1.plot(epochs, self.snr_log, color='tab:red', marker='o', label='SNR (dB)')
            ax1.tick_params(axis='y', labelcolor='tab:red')

            # Add a second y-axis for noise type, categorical
            ax2 = ax1.twinx()
            noise_type_values = [self.noise_types.index(nt) for nt in self.noise_type_log]  # Convert noise types to numeric values for plotting
            ax2.set_ylabel('Noise Type', color='tab:green')
            ax2.scatter(epochs, noise_type_values, color='tab:green', label='Noise Type')
            ax2.tick_params(axis='y', labelcolor='tab:green')

            # add a third yaxis for validation accuracy
            ax3 = ax1.twinx()
            ax3.spines['right'].set_position(('outward', 60))
            ax3.set_ylabel('Validation Accuracy', color='tab:blue')
            ax3.plot(epochs, self.validation_accuracy, color='tab:blue', marker='x', label='Validation Accuracy')
            ax3.tick_params(axis='y', labelcolor='tab:blue')

            fig.tight_layout()
            plt.title('SNR, Noise Type, and Validation Accuracy over Epochs')
            plt.show()





In [None]:
    # train the model on the training dataset and validate on the validation dataset
    history_clean = model.fit(
        train_generator_clean,
        epochs=100,
        validation_data=val_generator_clean,
        callbacks=[early_stopping,lr_scheduler, model_checkpoint_clean],
        class_weight=class_weights_dict

        )

In [None]:
    #plotting training and validation accuracies for clean
    plt.plot(history_clean.history['accuracy'], label='Training Accuracy')
    plt.plot(history_clean.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()


In [None]:

    # define SNR range and noise types for the NoiseScheduler
    snr_range = np.linspace(30, 40, num=5)
    noise_types = ['gaussian', 'salt_pepper']

    # noiseScheduler with the SNR range and noise types
    noise_scheduler = NoiseScheduler(snr_range, noise_types)

    # fit the model
    history_noisy = model.fit(
        train_generator_noise,
        epochs=100,
        validation_data=val_generator_noise,
        callbacks=[noise_scheduler, early_stopping, lr_scheduler, model_checkpoint_noise],
        class_weight=class_weights_dict
    )


In [None]:
    # plotting training and validation accuracies for noise
    plt.plot(history_noisy.history['accuracy'], label='Training Accuracy')
    plt.plot(history_noisy.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [None]:
    results_df = pd.DataFrame({
        'Epoch': noise_scheduler.epoch_log,
        'SNR': noise_scheduler.snr_log,
        'Noise_Type': noise_scheduler.noise_type_log,
        'Validation_Accuracy': noise_scheduler.validation_accuracy
    })

    # Display the DataFrame
    print(results_df)

In [None]:
# function to create test data generators

def create_test_generators(batch_size=32):

    clean_test_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input)

    noise_test_datagen = ImageDataGenerator(preprocessing_function=custom_preprocessing_function)

    clean_test_generator = clean_test_datagen.flow_from_directory(
        test_path,  # Path to clean test data
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False  # for evaluation
    )

    noise_test_generator = noise_test_datagen.flow_from_directory(
        test_path,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )

    # rretrieve and display images from the noisy data generator
    print("clean_test_generator:")
    images, labels = next(clean_test_generator)
    plotImages(images[:5])

    print("Noise_test_generator:")
    # retrieve and display images from the clean data generator
    images, labels = next(noise_test_generator)
    plotImages(images[:5])

    return clean_test_generator, noise_test_generator

In [None]:
class_names = ['benign', 'malignant', 'normal']

def evaluate_model(model, generator, steps, class_names):
    # reset the generator to ensure its at the beginning
    generator.reset()
    #predict on the entire dataset
    predictions = model.predict(generator, steps=steps)
    # get the highest probability class as the predicted class
    predicted_classes = np.argmax(predictions, axis=1)
    #true classes from the generator
    true_classes = generator.classes
    #print the classification report
    print(classification_report(true_classes, predicted_classes, target_names=class_names))
    # Calculate and print accuracy
    accuracy = accuracy_score(true_classes, predicted_classes)
    print(f'Accuracy: {accuracy * 100:.2f}%')


In [None]:
# Evaluate the model
model.load_weights('clean_best_model.h5')  # Load the best model saved by ModelCheckpoint

In [None]:
# import pandas as pd

# Reset the global noise log before evaluation
global_noise_log = []

clean_test_gen, noise_test_gen = create_test_generators(batch_size=32)




In [None]:
# Evaluate on clean test data

clean_test_loss, clean_test_accuracy, clean_test_precision, clean_test_recall, clean_test_auc = model.evaluate(clean_test_gen, verbose=1)
print(f"Clean Test Accuracy: {clean_test_accuracy * 100:.2f}%%, Precision: {clean_test_precision}, Recall: {clean_test_recall}, AUC: {clean_test_auc}")

In [None]:
# Clean Test Data
print("Evaluation on Clean Test Data:")
evaluate_model(model, clean_test_gen, len(clean_test_gen), class_names)


In [None]:
# Evaluate the model
model.load_weights('noise_best_model.h5')  # Load the best model saved by ModelCheckpoint

In [None]:
# # Evaluate on noisy test data
noise_test_loss, noise_test_accuracy, noise_test_precision, noise_test_recall, noise_test_auc = model.evaluate(noise_test_gen, verbose=1)
print(f"Noisy Test Accuracy: {noise_test_accuracy * 100:.2f}%, Precision: {noise_test_precision}, Recall: {noise_test_recall}, AUC: {noise_test_auc}")

# Create a DataFrame to store metrics with noise details
metrics_df = pd.DataFrame(global_noise_log)
metrics_df['Accuracy'] = noise_test_accuracy
metrics_df['Precision'] = noise_test_precision
metrics_df['Recall'] = noise_test_recall
metrics_df['AUC'] = noise_test_auc

# Display the DataFrame
print(metrics_df)

In [None]:
# For noise Test Data
print("Evaluation on Noise Test Data:")
evaluate_model(model, noise_test_gen, len(noise_test_gen), class_names)

 Precision: measures the ratio of correctly predicted positive observations to the total predicted positives.

 Recal: actual positive cases were correctly identified by the model.

 AUC: good ability of the model to distinguish between classes.

References:


https://medium.com/@dnyaneshwalwadkar/fix-training-accuracy-fluctuation-over-fitting-problem-in-deep-learning-algorithm-859573090809

\

https://github.com/MedMachine00/Deep-learning-based-coronary-artery-segmentation/tree/main

\

https://github.com/Paperspace/DataAugmentationForObjectDetection/blob/master/quick-start.ipynb


\


https://www.researchgate.net/publication/352806683_Image_De-Noising_with_Machine_Learning_A_Review#:~:text=This%20paper%20explores%20the%20numerous,networks%20and%20generative%20adversarial%20networks

\
https://arxiv.org/pdf/1609.03683.pdf

\

https://keras.io/api/

\

https://www.researchgate.net/publication/371699794_Deep_learning_techniques_on_3D-MRI_lung_images_for_detection_and_segmentation_of_COVID-19_virus

\
https://link.springer.com/article/10.1007/s10462-023-10453-z#Sec21

\
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7085309/


\
https://mrimaster.com/snr/

\

https://radiopaedia.org/articles/signal-to-noise-ratio-mri?lang=gb

\
https://gist.github.com/Prasad9/28f6a2df8e8d463c6ddd040f4f6a028a?ref=blog.roboflow.com

\
Litjens, Geert, et al. "A survey on deep learning in medical image analysis." Medical image analysis 42 (2017)


Ker, Justin, et al. "Deep learning applications in medical image analysis." IEEE Access 6 (2018).

https://debuggercafe.com/adding-noise-to-image-data-for-deep-learning-data-augmentation/

signal-to-noise ratio (SNR) in dB. SNR is calculated using the formula:

SNR (dB)
=
10
×
log
⁡
10
(
signal power/
noise power
)

power of the signal (image pixel values)

noise power(variance of the noise)