In [None]:
pip install pynrrd

Collecting pynrrd
  Downloading pynrrd-1.0.0-py2.py3-none-any.whl (19 kB)
Collecting nptyping (from pynrrd)
  Downloading nptyping-2.5.0-py3-none-any.whl (37 kB)
Installing collected packages: nptyping, pynrrd
Successfully installed nptyping-2.5.0 pynrrd-1.0.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import necessary libraries
import os
import nrrd
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Conv2DTranspose, concatenate
from skimage.filters import sobel
import matplotlib.pyplot as plt
from tensorflow.keras.metrics import AUC
from sklearn.metrics import roc_curve
import time
from scipy.spatial.distance import dice

# Import necessary libraries for AUC and ROC
from sklearn.metrics import roc_auc_score


In [None]:
# Function to find NRRD files in a given folder. It reads segmented and unsegmented files into a dictionary.
def find_nrrd_files(folder):
    nrrd_files = {}
    for root, dirs, files in os.walk(folder):
        folder_name = os.path.basename(root)
        if folder_name not in nrrd_files:
            nrrd_files[folder_name] = {'seg': None, 'unseg': None}
        for file in files:
            if file.endswith('seg.nrrd'):
                nrrd_files[folder_name]['seg'] = os.path.join(root, file)
            elif not file.endswith('xlsx'):
                nrrd_files[folder_name]['unseg'] = os.path.join(root, file)
    return nrrd_files


In [None]:
# Function to calculate Dice Coefficient
def dice_coefficient(y_true, y_pred):
    y_true_f = y_true.flatten()
    y_pred_f = y_pred.flatten()
    intersection = np.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (np.sum(y_true_f) + np.sum(y_pred_f) + 1)


In [None]:
# Function to load and preprocess NRRD files. It selects informative slices based on Sobel filter scores.
def load_and_preprocess_volume(image_path, mask_path, target_size=(256, 256), num_slices=5):
    volume, _ = nrrd.read(image_path)
    segmentation, _ = nrrd.read(mask_path)
    volume = (volume - np.min(volume)) / (np.max(volume) - np.min(volume))

    selected_indices = select_informative_slices(volume, num_slices=num_slices)
    selected_segmentation = segmentation[selected_indices, :, :]

    processed_images = []
    processed_masks = []
    for i in selected_indices:
        image_slice = volume[i, :, :]
        mask_slice = segmentation[i, :, :]
        image_pil = Image.fromarray((image_slice * 255).astype(np.uint8)).resize(target_size)
        mask_pil = Image.fromarray((mask_slice * 255).astype(np.uint8)).resize(target_size)
        image_np = np.array(image_pil) / 255.0
        mask_np = np.array(mask_pil) / 255.0
        processed_images.append(image_np[..., np.newaxis])
        processed_masks.append(mask_np[..., np.newaxis])

    return np.array(processed_images), np.array(processed_masks)


In [None]:
# Function to select the most informative slices from a volume using Sobel filter scores.
def select_informative_slices(volume, num_slices=5, sobel_threshold=0.9):
    slice_scores = []
    for i in range(volume.shape[0]):
        slice = volume[i, :, :]
        edge_score = np.mean(sobel(slice, mask=slice > sobel_threshold))
        slice_scores.append((i, edge_score))

    slice_scores.sort(key=lambda x: x[1], reverse=True)
    selected_indices = [idx for idx, _ in slice_scores[:num_slices]]

    return np.array(selected_indices, dtype=int)

In [None]:
# U-Net architecture

def unet(input_size=(256, 256, 1)):
    inputs = Input(input_size)
    conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
    conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
    drop5 = Dropout(0.5)(conv5)

    up6 = Conv2DTranspose(512, 2, strides=(2, 2), padding='same', kernel_initializer='he_normal')(drop5)
    merge6 = concatenate([drop4, up6], axis=3)
    conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6)
    conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)

    up7 = Conv2DTranspose(256, 2, strides=(2, 2), padding='same', kernel_initializer='he_normal')(conv6)
    merge7 = concatenate([conv3, up7], axis=3)
    conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
    conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)

    up8 = Conv2DTranspose(128, 2, strides=(2, 2), padding='same', kernel_initializer='he_normal')(conv7)
    merge8 = concatenate([conv2, up8], axis=3)
    conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
    conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)

    up9 = Conv2DTranspose(64, 2, strides=(2, 2), padding='same', kernel_initializer='he_normal')(conv8)
    merge9 = concatenate([conv1, up9], axis=3)
    conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
    conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    output = Conv2D(1, 1, activation='sigmoid')(conv9)

    model = Model(inputs=[inputs], outputs=[output])
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)  # Specify the learning rate here
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# Function to calculate Dice Coefficient
def dice_coefficient(y_true, y_pred):
    y_true_f = y_true.flatten()
    y_pred_f = y_pred.flatten()
    intersection = np.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (np.sum(y_true_f) + np.sum(y_pred_f) + 1)


In [None]:
# Start the timer for the entire script execution
start_time_script = time.time()

# Main code block to execute the script functionalities
if __name__ == "__main__":
    # Define file locations and early stopping callback
    file_location = "/content/drive/MyDrive/Dataset"
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Load and preprocess the dataset
    nrrd_files_dict = find_nrrd_files(file_location)
    nrrd_files_dict.pop('Dataset')
    nrrd_files_dict.pop('HCC_017')
    images, masks = [], []
    for folder_name, files in nrrd_files_dict.items():
        img, mask = load_and_preprocess_volume(files['unseg'], files['seg'])
        images.extend(img)
        masks.extend(mask)

    # Convert lists to numpy arrays
    images = np.array(images)
    masks = np.array(masks)

    # Split the dataset into training and testing sets
    X_train_test, X_test, Y_train_test, Y_test = train_test_split(images, masks, test_size=0.3, random_state=42)

    # Prepare K-Fold Cross Validation
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    fold_no = 1
    loss_per_fold = []
    acc_per_fold = []
    dice_per_fold = []  # To store Dice coefficient

    # Training and validation process
    for train, val in kf.split(X_train_test, Y_train_test):
        print(f'Training on fold {fold_no}...')

        # Data partitioning
        X_train, X_val = X_train_test[train], X_train_test[val]
        Y_train, Y_val = Y_train_test[train], Y_train_test[val]

        # Model initialization and compilation
        model = unet(input_size=(256, 256, 1))

        # Model training
        history = model.fit(X_train, Y_train, batch_size=8, epochs=15, validation_data=(X_val, Y_val), callbacks=[early_stopping])

        # Performance evaluation
        scores = model.evaluate(X_val, Y_val, verbose=0)
        print(f'Score for fold {fold_no}: Accuracy of {scores[1]*100}%')
        acc_per_fold.append(scores[1] * 100)
        loss_per_fold.append(scores[0])

        # Predict on the validation set
        Y_pred = model.predict(X_val)
        # Calculate Dice Coefficient for each fold
        dice_score = dice_coefficient(Y_val, Y_pred)
        dice_per_fold.append(dice_score)

        fold_no += 1

    # Average performance across all folds
    print(f'Average Accuracy: {np.mean(acc_per_fold)}% (+- {np.std(acc_per_fold)})')
    print(f'Average Loss: {np.mean(loss_per_fold)}')
    print(f"Average Dice Coefficient: {np.mean(dice_per_fold)}")

    # Final evaluation on the test set
    test_loss, test_accuracy = model.evaluate(X_test, Y_test)
    print(f"Test Loss: {test_loss}")
    print(f"Test Accuracy: {test_accuracy}%")

    # End the timer for the entire script execution and print the time
    end_time_script = time.time()
    print(f"Total script execution time: {end_time_script - start_time_script} seconds")

Training on fold 1...
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Score for fold 1: Accuracy of 98.13701510429382%
Training on fold 2...
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Score for fold 2: Accuracy of 96.70760035514832%
Training on fold 3...
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Score for fold 3: Accuracy of 97.26991057395935%
Training on fold 4...
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Score for fold 4: Accuracy of 97.2063422203064%
Training on fold 5...
Epoch 1/15
Epoch 2/15
E



Average Accuracy: 97.27704286575317% (+- 0.47203948978225047)
Average Loss: 0.419219084456563
Average Dice Coefficient: 0.164596835821288
Test Loss: 0.6691646575927734
Test Accuracy: 0.9777360558509827%
Total script execution time: 2337.34721660614 seconds
