In [4]:
import os
import sys
import pandas as pd
import tensorflow as tf 
import numpy as np
import glob
from sklearn import metrics
from sklearn.metrics import classification_report


/Users/jc/code/ich-detection


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


### RUN THIS CELL IF YOU ARE RUNNING THIS NOTEBOOK LOCALLY

In [None]:
#if os.getcwd().split('/')[-1] == "evaluation":
#    %cd ../..

#from scripts.physical_transformation import get_noisy_image
#from scripts.utils import read_dcm
#from scripts.model import VGG

### IF RUNNING ON GOOGLE COLAB OR KAGGLE, RUN THE NEXT CELL

In [None]:
# Clone the repository
!git clone https://github.com/jpscardoso97/ich-detection.git 

repo_name = "ich-detection"

sys.path.append(repo_name)

# Change to the repository directory
%cd {repo_name}

# TODO: Remove once merged to main branch
!git checkout feat/repo-restructure

%pip install -r requirements.txt

from scripts.utils import read_dcm
from scripts.model import VGG
from scripts.physical_transformation import get_noisy_image

username = 'kaggle_username'
key = 'kaggle_key'

# Create kaggle.json
os.makedirs(os.path.join(os.path.expanduser("~"), '.kaggle'), exist_ok=True)
with open(os.path.join(os.path.expanduser("~"), '.kaggle/kaggle.json'), 'w') as file:
    file.write('{"username":"%s","key":"%s"}' % (username, key))

# Set permissions
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Load the dataset
import kaggle
kaggle.api.dataset_download_files('jpscardoso/rsna-bme548', path='data/', unzip=True)

## Define noise level for experiment

In [None]:
NOISE_LEVEL = 0
TEST_NOISE_LEVELS = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8]

## Load Data

In [9]:
def get_data(filenames, df, noise_level):
  labels=[]
  images=[]
  for i in filenames:
    filename = os.path.basename(i)
    img=read_dcm(i)
    img = get_noisy_image(img, noise_level)
    images.append(np.array(img))
    # Get label of the image (has hemorrhage or not)
    labels.append(df.loc[df['filename'] == filename]['any'].values[0])
  
  return images, labels

def get_train_data():
  train_df=pd.read_csv("data/train/data.csv")
  train_files=glob.glob("data/train/images/*.dcm")

  train_images, train_labels = get_data(train_files, train_df, NOISE_LEVEL)

  X_train = np.array(train_images)/255
  X_train = np.expand_dims(X_train,3)
  y_train = np.array(train_labels)
  
  return X_train, y_train

def get_test_data():
    test_df=pd.read_csv("data/test/data.csv")
    test_files=glob.glob("data/test/images/*.dcm")

    # To test the trained model on all the different noise levels
    X_tests = {}
    y_tests = {}
    for noise_level in TEST_NOISE_LEVELS:
      test_images, test_labels = get_data(test_files, test_df, noise_level)
      x = np.array(test_images)/255
      x = np.expand_dims(x,3)
      y = np.array(test_labels)
      X_tests[noise_level] = x
      y_tests[noise_level] = y
        
    return X_tests, y_tests

In [None]:
def train_model(X_train, y_train):
  early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=6
  )

  optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

  model = VGG()
  model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
  model.fit(X_train, y_train, epochs=25, validation_split=0.2, callbacks=[early_stopping])

  return model

In [None]:
def get_predictions(model, X_test):
    preds = model.predict(X_test)
    preds_binary = (preds > 0.5).astype(np.int)
    return preds_binary

In [None]:
def get_classification_report(y_test, y_pred):
  accuracy = metrics.accuracy_score(y_test,y_pred)

  print("Accuracy of model=",accuracy)

  return classification_report(y_test, y_pred, target_names=['0','1'])

In [None]:
# Get train data and test sets for all noise levels
X_train, y_train = get_train_data()

In [None]:
# Train the model
model = train_model(X_train, y_train)

In [None]:
X_tests, y_tests = get_test_data()

In [None]:
# Get predictions on test set
accuracies = []
all_trues = []
all_preds = []

for noise_level in TEST_NOISE_LEVELS:
    y_pred = get_predictions(model, X_tests[noise_level])
    accuracy = metrics.accuracy_score(y_tests[noise_level], y_pred)
    print(f"Accuracy of model on noise level {noise_level} = {accuracy}")
    accuracies.append(accuracy)
    all_trues.extend(y_tests[noise_level])
    all_preds.extend(y_pred)


In [None]:
# Get classification report
class_report = get_classification_report(all_trues, all_preds)
# Convert noise level to a valid string for the filename
filename = f"report_{str(NOISE_LEVEL).replace('.', '_')}_level.txt"
# Save classification report
with open(f"data/outputs/classification_reports/{filename}", "w") as file:
    file.write(class_report)

In [None]:
# show confusion matrix
confusion_matrix = metrics.confusion_matrix(all_trues, all_preds)
print(confusion_matrix)