In [1]:
%pylab inline
%load_ext autoreload
%autoreload 2

Populating the interactive namespace from numpy and matplotlib


In [2]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from tensorflow.keras.models import Model, load_model
import pickle
from keras_unet.metrics import iou, iou_thresholded
import os
import nrrd
import mahotas as mh
from keras import backend as K

2024-06-14 17:00:50.527137: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


-----------------------------------------
keras-unet init: TF version is >= 2.0.0 - using `tf.keras` instead of `Keras`
-----------------------------------------


In [3]:
DATAPATH= '/raid/mpsych/CACTAS/DATA/ESUS'
CAPATH= '/raid/mpsych/CACTAS/DATA/CA24'
path = '/raid/mpsych/CACTAS/DATA/HISTORY/'

In [4]:
def read_nrrd_files(file_list, DATAPATH, CAPATH):
    img_files = []
    seg_files = []
    ca_files = []
    for file in file_list:
        img_file = os.path.join(DATAPATH, file)
        
        if file.endswith('.b.img.nrrd'):
            seg_file = os.path.join(DATAPATH, file.replace('.b.img.nrrd', '.b.seg.nrrd'))
            ca_file = os.path.join(CAPATH, file.replace('.b.img.nrrd', '.ca.seg.nrrd'))
        else:
            seg_file = os.path.join(DATAPATH, file.replace('.img.nrrd', '.b.seg.nrrd'))
            ca_file = os.path.join(CAPATH, file.replace('.img.nrrd', '.ca.seg.nrrd'))

        
        img_data, img_header = nrrd.read(img_file)
        img_files.append(img_data)
        #print(img_file)
        
        if os.path.exists(seg_file):
            seg_data, seg_header = nrrd.read(seg_file)
            seg_files.append(seg_data)
            #print(seg_file)
        else:
            print("cannot find label " + file)
        
        if os.path.exists(ca_file):
            ca_data, ca_header = nrrd.read(ca_file)
            ca_files.append(ca_data)
            #print(ca_file)
        else:
            print("cannot find ca mask " + ca_file)
    
    return img_files, seg_files, ca_files

In [5]:
def normalize_data(data_list):
    normalized_data = []
    for data in data_list:
        min_val = np.min(data)
        max_val = np.max(data)
        normalized_data.append((data - min_val) / (max_val - min_val))
    return normalized_data

def slice_into_2d(img_list, seg_list, m_list):
    slices = []
    for i in range(len(img_list)):
        data_array = np.array(img_list[i])
        for z in range(data_array.shape[2]):
            slice_2d = data_array[:, :, z]
            slices.append(slice_2d)
    slices_img = np.array(slices)
            
    slices1 = []
    for i in range(len(seg_list)):
        for z in range(seg_list[i].shape[2]):
            slice_2d = seg_list[i][:, :, z]
            slices1.append(slice_2d)

    new_slices = []
    for i in range(len(slices1)):
        slices = np.where(slices1[i] != 0, True, False)
        new_slices.append(slices)
    slices_seg = np.array(new_slices)
    
    slices_mtrain=[]
    for i in range(len(m_list)):
        data_array = np.array(m_list[i])
        for z in range(data_array.shape[2]):
            slice_2d = data_array[:, :, z]
            dilated = mh.dilate(slice_2d.astype(np.bool_))
            for _ in range(9):
                dilated = mh.dilate(dilated)
            slices_mtrain.append(dilated)
    slices_ca = np.array(slices_mtrain)
    
    slices_img = slices_img.reshape(slices_img.shape[0], slices_img.shape[1],slices_img.shape[2], 1)
    slices_seg = slices_seg.reshape(slices_seg.shape[0], slices_seg.shape[1],slices_seg.shape[2], 1)
    slices_ca = slices_ca.reshape(slices_ca.shape[0], slices_ca.shape[1],slices_ca.shape[2], 1)
        
    print(slices_img.shape, slices_seg.shape, slices_ca.shape)
    
    return slices_img, slices_seg, slices_ca

def prepare_data(file_list, DATAPATH, CAPATH):
    X, y, z = read_nrrd_files(file_list, DATAPATH, CAPATH)
    
    #shuffle data
    data = list(zip(X, y, z))
    np.random.shuffle(data)
    X, y, z = zip(*data)
    
    # Normalize the data
    X = normalize_data(X)
    Z = normalize_data(z)    
       
    # Slice the 3D data into 2D slices
    X_slices, y_slices, m_slices = slice_into_2d(X, y, Z)
    
    return X_slices, y_slices, m_slices

In [6]:
def masked_image(X_train, m_train):
    ## train image data + ca 
    train_masks=[]
    for i in range(len(m_train)):
        binary = (m_train[i] > 0).astype(np.uint8)
        train_masks.append(binary)

    train_images=[]
    for i in range(len(X_train)):
        train_image = X_train[i] * train_masks[i]
        train_images.append(train_image)
    train_images_array = np.array(train_images) 

    train_images_array = train_images_array.astype(np.float32)
    train_images_array = train_images_array.reshape(train_images_array.shape[0], train_images_array.shape[1],train_images_array.shape[2], 1)

    return train_images_array

In [7]:
with open(path + 'unet_27/train_patient_order.pkl', 'rb') as f:
    train_27 = pickle.load(f)

with open(path + 'unet_27/test_patient_order.pkl', 'rb') as f:
    test_27 = pickle.load(f)
    
with open(path + 'unet_27/training_history.pkl', 'rb') as f:
    history_27 = pickle.load(f)

with open(path + 'unet_27/experiment_data.pkl', 'rb') as f:
    data_27 = pickle.load(f)

In [8]:
model_path = '/raid/mpsych/CACTAS/DATA/HISTORY/unet_27/unet_model.h5'

In [9]:
X_train, y_train, m_train = prepare_data(train_27, DATAPATH, CAPATH)

(13328, 512, 512, 1) (13328, 512, 512, 1) (13328, 512, 512, 1)


In [10]:
X_train = X_train.astype(np.float32)
m_train = m_train.astype(np.float32)

In [11]:
train_images_array = masked_image(X_train, m_train)

In [12]:
X_data = train_images_array
y_data = y_train

In [13]:
####### 1

In [14]:
kf = KFold(n_splits=10, shuffle=False)
#kf = KFold(n_splits=10, shuffle=True, random_state=42)
losses = []
ious = []
iou_thresholds = []

In [15]:
for train_index, val_index in kf.split(X_data):
    X_train, X_val = X_data[train_index], X_data[val_index]
    y_train, y_val = y_data[train_index], y_data[val_index]
    
    model_27 = load_model(model_path, custom_objects={'iou': iou, 'iou_thresholded': iou_thresholded})
    model_27.compile(optimizer='adam', loss='binary_crossentropy', metrics=[iou, iou_thresholded])
    
    scores = model_27.evaluate(X_val, y_val, verbose=0)
    
    losses.append(scores[0])
    ious.append(scores[1])
    iou_thresholds.append(scores[2])

2024-06-14 17:03:43.103784: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2024-06-14 17:03:43.417905: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:47:00.0 name: A100-SXM4-40GB computeCapability: 8.0
coreClock: 1.41GHz coreCount: 108 deviceMemorySize: 39.59GiB deviceMemoryBandwidth: 1.41TiB/s
2024-06-14 17:03:43.420559: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 1 with properties: 
pciBusID: 0000:4e:00.0 name: A100-SXM4-40GB computeCapability: 8.0
coreClock: 1.41GHz coreCount: 108 deviceMemorySize: 39.59GiB deviceMemoryBandwidth: 1.41TiB/s
2024-06-14 17:03:43.420580: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2024-06-14 17:03:43.448647: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2024-06-14 

In [16]:
print(f'Mean IoU: {np.mean(ious)}, Std IoU: {np.std(ious)}, Mean IoU Threshold: {np.mean(iou_thresholds)}')

Mean IoU: 6.1240363947945294e-06, Std IoU: 1.8010990658200576e-06, Mean IoU Threshold: 8.453580164768937e-07


In [26]:
###### 2

In [17]:
loo = LeaveOneOut()
loo_results=[]
counter = 0
# max_iterations = 1000

In [19]:
for train_index, val_index in loo.split(X_data):
#     if counter >= max_iterations:
#         break
        
    X_train, X_val = X_data[train_index], X_data[val_index]
    y_train, y_val = y_data[train_index], y_data[val_index]
    
    K.clear_session()
    
    model_27 = load_model(model_path, custom_objects={'iou': iou, 'iou_thresholded': iou_thresholded})
    model_27.compile(optimizer='adam', loss='binary_crossentropy', metrics=[iou, iou_thresholded])
    scores = model_27.evaluate(X_val, y_val, verbose=0)
    loo_results.append(scores[1])
    
#     counter += 1 

In [22]:
print(f'Mean IoU: {np.mean(loo_results)}, Std IoU: {np.std(loo_results)}')

Mean IoU: 1.3484958641748264e-05, Std IoU: 4.1917188232228895e-05


In [None]:
#100: Mean IoU: 7.635893061888056e-06, Std IoU: 5.895904461056821e-09

In [None]:
#500: Mean IoU: 1.9095010140517843e-05, Std IoU: 6.982788610695563e-05

In [None]:
#full: Mean IoU: 1.3484958641748264e-05, Std IoU: 4.1917188232228895e-05