In [3]:
import sys
import tensorflow as tf 

from skimage.restoration import denoise_nl_means, estimate_sigma
from sklearn import metrics
from sklearn.metrics import classification_report

### TO RUN ON KAGGLE, RUN THE NEXT CELL TO IMPORT ALL THE NECESSARY CODE

In [4]:
# Clone the repository
!git clone https://github.com/jpscardoso97/ich-detection.git 

repo_name = "ich-detection"

sys.path.append(repo_name)

# Change to the repository directory
%cd {repo_name}

%pip install -r requirements.txt

from scripts.model import VGG

fatal: destination path 'ich-detection' already exists and is not an empty directory.
/kaggle/working/ich-detection
Collecting matplotlib==3.7.1 (from -r requirements.txt (line 4))
  Downloading matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting pandas==2.2.2 (from -r requirements.txt (line 6))
  Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting pywavelets==1.6.0 (from -r requirements.txt (line 8))
  Downloading pywavelets-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting scikit-learn==1.4.2 (from -r requirements.txt (line 9))
  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting tensorflow==2.16.1 (from -r requirements.txt (line 11))
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting ml-dtypes~=

## Define noise level for experiment

In [5]:
NOISE_LEVEL = 0
TEST_NOISE_LEVELS = [0, 0.25, 0.5, 0.75, 1, 1.5]

### Denoiser

In [None]:
def denoise_gaussian_image(image):
    sigma_est = estimate_sigma(image, average_sigmas=True, channel_axis=-1)
    denoised_image = denoise_nl_means(image, h= 9 * sigma_est, fast_mode=True, patch_size=5, patch_distance=3, channel_axis=-1)

    return denoised_image
    

## Load Data

In [31]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=denoise_gaussian_image)
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=denoise_gaussian_image)

n = str.replace(str(NOISE_LEVEL), '.', '_')
train_file = '/kaggle/input/rsna-bme548-png/RSNA_BME548-sino-png/train_noisy_' + n
test_path = '/kaggle/input/rsna-bme548-png/RSNA_BME548-sino-png/test_noisy_'

# Generators
train_generator = train_datagen.flow_from_directory(
    train_file,
    target_size=(224, 224),
    batch_size=4,
    class_mode='binary'
)

validation_generator = val_datagen.flow_from_directory(
    test_path + n,
    target_size=(224, 224),
    batch_size=4,
    class_mode='binary'
)

Found 3199 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


### Train model

In [7]:
def train_model():
  optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.0005,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07,
    amsgrad=False
  )

  early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
  )

  model = VGG()
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

  model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=50,
    callbacks=[early_stopping]
  )

  return model

### Get predictions

In [8]:
def get_predictions(model, data_gen):
    preds = model.predict(data_gen)
    preds_binary = (preds > 0.5).astype(int)

    return preds_binary

### Classification Report

In [9]:
def get_classification_report(y_test, y_pred):
  accuracy = metrics.accuracy_score(y_test,y_pred)

  print("Accuracy of model=",accuracy)

  return classification_report(y_test, y_pred)

### Run experiments

In [11]:
import warnings
warnings.filterwarnings('ignore')

model = train_model()

Epoch 1/50
[1m 13/800[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 15ms/step - accuracy: 0.5274 - loss: 26.2721

W0000 00:00:1714497933.501057     152 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5809 - loss: 3.5211

W0000 00:00:1714497946.041158     153 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 19ms/step - accuracy: 0.5810 - loss: 3.5194 - val_accuracy: 0.5288 - val_loss: 1.5877
Epoch 2/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 17ms/step - accuracy: 0.6404 - loss: 1.4517 - val_accuracy: 0.7013 - val_loss: 0.6072
Epoch 3/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 17ms/step - accuracy: 0.6741 - loss: 0.8614 - val_accuracy: 0.6900 - val_loss: 0.5897
Epoch 4/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 17ms/step - accuracy: 0.6768 - loss: 1.0070 - val_accuracy: 0.5587 - val_loss: 1.1431
Epoch 5/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.7217 - loss: 0.6232 - val_accuracy: 0.7013 - val_loss: 0.5978
Epoch 6/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 17ms/step - accuracy: 0.7240 - loss: 0.6403 - val_accuracy: 0.6050 - val_loss: 0.9800
Epoch 7/50
[1m800/800[0m 

In [34]:
def get_test_data():
    test_gens = {}
    for nl in TEST_NOISE_LEVELS:
        test_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
        n = str.replace(str(nl), '.', '_')
            
        test_generator = test_datagen.flow_from_directory(
            test_path+n,
            target_size=(224, 224),
            batch_size=4,
            class_mode='binary'
        )

        test_gens[nl] = test_generator

    return test_gens

In [35]:
noise_data_gens = get_test_data()

Found 800 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [36]:
# Define the class names
class_names = ['Hemorrhage', 'No Hemorrhage']

accuracies = []
all_trues = []
all_preds = []

for noise_level in TEST_NOISE_LEVELS:
    y_pred = get_predictions(model, noise_data_gens[noise_level])
    labels = noise_data_gens[noise_level].labels
    accuracy = metrics.accuracy_score(labels, y_pred)
    print(f"Accuracy of model on noise level {noise_level} = {accuracy}")
    accuracies.append(accuracy)
    all_trues.extend(labels)
    all_preds.extend(y_pred)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step
Accuracy of model on noise level 0 = 0.48375
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step
Accuracy of model on noise level 0.25 = 0.505
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step
Accuracy of model on noise level 0.75 = 0.4725
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step
Accuracy of model on noise level 1 = 0.51875
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step
Accuracy of model on noise level 1.5 = 0.50375


In [None]:
# Get classification report
class_report = get_classification_report(all_trues, all_preds)
# Convert noise level to a valid string for the filename
filename = f"report_{str(NOISE_LEVEL).replace('.', '_')}_level.txt"
# Save classification report
with open(f"data/outputs/classification_reports/{filename}", "w") as file:
    file.write(class_report)

In [None]:
# show confusion matrix
confusion_matrix = metrics.confusion_matrix(all_trues, all_preds)
print(confusion_matrix)