In [1]:
import matplotlib.pyplot as plt
import os
import zipfile
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
import nibabel as nib
from scipy import ndimage
import glob
import sys,os
from natsort import natsorted
import tensorflow_probability as tfp
import random
from aifnet_utils.preprocess import read_nifti_file, normalize, normalize_aif, process_scan, normalize_zero_one
from aifnet_utils.losses import MaxCorrelation
from aifnet_utils.data_loaders import read_isles_annotations, read_isles_volumes, ISLES18DataGen_aifvof_aug
from aifnet_utils.data_loaders import delay_sequence_padding, anticipate_sequence_padding, late_bolus, early_bolus
from pathlib import Path

%matplotlib inline

In [2]:
#Reading the file with all the 94 cases of training 
#TODO: Annotate the 62 cases from test
root_dir     = '/media/sebastian/data/ASAP/ISLES2018_Training/'
ROOT_EXP = '/home/sebastian/experiments/aifnet_replication'#'/Users/sebastianotalora/work/postdoc/ctp/aifnet_replication'

#At insel: 
#Local: '/Users/sebastianotalora/work/postdoc/data/ISLES/'
aif_annotations_path = ROOT_EXP + 'annotated_aif_vof_complete_revised.csv'
min_num_volumes_ctp = 43


In [3]:
dict_patient_cases_training = {'Train_01_A':'case 1',
'Train_01_B':'case 2',
'Train_02_A':'case 3',
'Train_02_B':'case 4',
'Train_03_A':'case 5',
'Train_03_B':'case 6',
'Train_04_A':'case 7',
'Train_04_B':'case 8',
'Train_05_A':'case 9',
'Train_06_A':'case 10',
'Train_07_A':'case 11',
'Train_08_A':'case 12',
'Train_09_A':'case 13',
'Train_10_A':'case 14',
'Train_11_A':'case 15',
'Train_12_A':'case 16',
'Train_13_A':'case 17',
'Train_14_A':'case 18',
'Train_14_B':'case 19',
'Train_15_A':'case 20',
'Train_16_A':'case 21',
'Train_16_B':'case 22',
'Train_17_A':'case 23',
'Train_17_B':'case 24',
'Train_18_A':'case 25',
'Train_19_A':'case 26',
'Train_19_B':'case 27',
'Train_20_A':'case 28',
'Train_20_B':'case 29',
'Train_21_A':'case 30',
'Train_22_A':'case 31',
'Train_23_A':'case 32',
'Train_23_B':'case 33',
'Train_24_A':'case 34',
'Train_24_B':'case 35',
'Train_25_A':'case 36',
'Train_25_B':'case 37',
'Train_26_A':'case 38',
'Train_27_A':'case 39',
'Train_27_B':'case 40',
'Train_28_A':'case 41',
'Train_28_B':'case 42',
'Train_29_A':'case 43',
'Train_29_B':'case 44',
'Train_30_A':'case 45',
'Train_30_B':'case 46',
'Train_31_A':'case 47',
'Train_31_B':'case 48',
'Train_32_A':'case 49',
'Train_32_B':'case 50',
'Train_33_A':'case 51',
'Train_33_B':'case 52',
'Train_34_A':'case 53',
'Train_34_B':'case 54',
'Train_35_A':'case 55',
'Train_35_B':'case 56',
'Train_36_A':'case 57',
'Train_36_B':'case 58',
'Train_37_A':'case 59',
'Train_37_B':'case 60',
'Train_38_A':'case 61',
'Train_39_A':'case 62',
'Train_39_B':'case 63',
'Train_40_A':'case 64',
'Train_40_B':'case 65',
'Train_41_A':'case 66',
'Train_41_B':'case 67',
'Train_42_A':'case 68',
'Train_42_B':'case 69',
'Train_43_A':'case 70',
'Train_43_B':'case 71',
'Train_44_A':'case 72',
'Train_44_B':'case 73',
'Train_45_A':'case 74',
'Train_46_A':'case 75',
'Train_46_B':'case 76',
'Train_47_A':'case 77',
'Train_48_A':'case 78',
'Train_48_B':'case 79',
'Train_49_A':'case 80',
'Train_50_A':'case 81',
'Train_51_A':'case 82',
'Train_52_A':'case 83',
'Train_53_A':'case 84',
'Train_54_A':'case 85',
'Train_55_A':'case 86',
'Train_56_A':'case 87',
'Train_57_A':'case 88',
'Train_58_A':'case 89',
'Train_59_A':'case 90',
'Train_60_A':'case 91',
'Train_61_A':'case 92',
'Train_62_A':'case 93',
'Train_63_A':'case 94',

}

In [4]:
dict_patient_cases_testing = {'Testing_01_A':'case 1',
'Testing_02_A':'case 2',
'Testing_03_A':'case 3',
'Testing_04_A':'case 4',
'Testing_05_A':'case 5',
'Testing_06_A':'case 6',
'Testing_07_A':'case 7',
'Testing_08_A':'case 8',
'Testing_09_A':'case 9',
'Testing_10_A':'case 10',
'Testing_10_B':'case 11',
'Testing_11_A':'case 12',
'Testing_12_A':'case 13',
'Testing_12_B':'case 14',
'Testing_13_A':'case 15',
'Testing_13_B':'case 16',
'Testing_14_A':'case 17',
'Testing_14_B':'case 18',
'Testing_15_A':'case 19',
'Testing_15_B':'case 20',
'Testing_16_A':'case 21',
'Testing_16_B':'case 22',
'Testing_17_A':'case 23',
'Testing_17_B':'case 24',
'Testing_18_A':'case 25',
'Testing_18_B':'case 26',
'Testing_19_A':'case 27',
'Testing_19_B':'case 28',
'Testing_20_A':'case 29',
'Testing_20_B':'case 30',
'Testing_21_A':'case 31',
'Testing_21_B':'case 32',
'Testing_22_A':'case 33',
'Testing_22_B':'case 34',
'Testing_23_A':'case 35',
'Testing_23_B':'case 36',
'Testing_24_A':'case 37',
'Testing_24_B':'case 38',
'Testing_25_A':'case 39',
'Testing_25_B':'case 40',
'Testing_26_A':'case 41',
'Testing_26_B':'case 42',
'Testing_27_A':'case 43',
'Testing_27_B':'case 44',
'Testing_28_A':'case 45',
'Testing_28_B':'case 46',
'Testing_29_A':'case 47',
'Testing_30_A':'case 48',
'Testing_31_A':'case 49',
'Testing_32_A':'case 50',
'Testing_33_A':'case 51',
'Testing_34_A':'case 52',
'Testing_35_A':'case 53',
'Testing_35_B':'case 54',
'Testing_36_A':'case 55',
'Testing_37_A':'case 56',
'Testing_37_B':'case 57',
'Testing_38_A':'case 58',
'Testing_38_B':'case 59',
'Testing_39_A':'case 60',
'Testing_39_B':'case 61',
'Testing_40_A':'case 62'}

In [5]:
len(dict_patient_cases_testing)

62

In [6]:
patient_cases_dict_training = {key.split('_')[1]: [] for key in  dict_patient_cases_training}
for key in dict_patient_cases_training:
    patient_id =  key.split('_')[1]
    cur_case = dict_patient_cases_training[key].split(' ')
    #print(patient_id)
    s = '_'.join(cur_case)
    patient_cases_dict_training[patient_id].append(s)

In [7]:
patient_cases_dict_test = {key.split('_')[1]: [] for key in  dict_patient_cases_testing}
for key in dict_patient_cases_testing:
    patient_id =  key.split('_')[1]
    cur_case = dict_patient_cases_testing[key].split(' ')
    #print(patient_id)
    s = '_'.join(cur_case)
    patient_cases_dict_test[patient_id].append(s)

In [15]:
print(len(patient_cases_dict_training),len(patient_cases_dict_test))
print(patient_cases_dict_training['01'])

63 40
['case_1', 'case_2']


In [17]:
dataset_dir = os.path.join(root_dir, "TRAINING")
filenames_4D = natsorted(glob.glob(dataset_dir + "/case_*/*4D*/*nii*"))
dataset_dir_test = os.path.join(root_dir, "TESTING")
filenames_4D_test = natsorted(glob.glob(dataset_dir_test + "/case_*/*4D*/*nii*"))


cases_paths = {}
cases_paths = {path.split('.')[-2]: path for path in filenames_4D }
cases_paths_test = {path.split('.')[-2]: path for path in filenames_4D_test}

print("Training paths: "+ str(len(cases_paths)) + "; Testing paths " + str(len(cases_paths_test) ))
print(cases_paths.keys())
print(cases_paths['345561'])

Training paths: 94; Testing paths 62
dict_keys(['345561', '345568', '345575', '345582', '345589', '345596', '345603', '345610', '345617', '345624', '345631', '345638', '345645', '345652', '345659', '345666', '345673', '339335', '339343', '345682', '345689', '345696', '345703', '345710', '345717', '345724', '345731', '345738', '345745', '345752', '345759', '345766', '345773', '345780', '345787', '345794', '345801', '345808', '345815', '345822', '345829', '345836', '345843', '345850', '345857', '345864', '345871', '345878', '345885', '345892', '345899', '345906', '345913', '345920', '345927', '345934', '345941', '345948', '345955', '345962', '345969', '345976', '345983', '345990', '345997', '346004', '346011', '346018', '346025', '346032', '346039', '346046', '346053', '346060', '346067', '346074', '346081', '346088', '346095', '346102', '346109', '346116', '346123', '346130', '346137', '346144', '346151', '346158', '346165', '346172', '346179', '346186', '346193', '346200'])
/media/seba

In [18]:
len(filenames_4D_test)

62

## Mapping from the path of the nii file to the corresponding patient id


In [19]:
patient_paths = {key.split('_')[1]: [] for key in  dict_patient_cases_training}
for key in cases_paths:
    cur_path = cases_paths[key]
    cur_case = cur_path.split('/')[-3]
    for patient_id in patient_cases_dict_training.keys():
        if cur_case in patient_cases_dict_training[patient_id]:
            patient_paths[patient_id].append(cur_path)


In [20]:
patient_paths_test = {key.split('_')[1]: [] for key in  dict_patient_cases_testing}
for key in cases_paths_test:
    cur_path = cases_paths_test[key]
    cur_case = cur_path.split('/')[-3]
    for patient_id in patient_cases_dict_training.keys():
        if cur_case in patient_cases_dict_training[patient_id]:
            patient_paths_test[patient_id].append(cur_path)


In [21]:
print(len(patient_paths_test) + len(patient_cases_dict_training))

103


## Creating five different sets of patient-separated [train, test] filenames to train aifnet

In [22]:
def rand_parts(seq, n, l):
    indices = range(len(seq) - (l - 1) * n)
    result = []
    offset = 0
    for i in sorted(random.sample(indices, n)):
        i += offset
        result.append(seq[i:i+l])
        offset += l - 1
    return result

In [23]:
def generate_train_val_test_paths_files(patient_paths):

    all_indexes = list(patient_paths.keys())
    fractions_partitions = [0.7,0.1,0.2] #Fractions for the training, Validation and test partitions

    num_tr, num_va = int(len(all_indexes)*0.7), int(len(all_indexes)*0.1)
    perm = np.random.permutation(len(all_indexes))
    train_cases = [all_indexes[perm[i]] for i in range(num_tr)]# all_indexes[perm[:num_tr]] [L[i] for i in Idx]
    val_cases   = [all_indexes[perm[i]] for i in range(num_tr,num_tr+num_va)]
    test_cases  = [all_indexes[perm[i]] for i in range(num_tr+num_va,len(all_indexes))]
    train_cases_paths, valid_cases_paths, test_cases_paths = [], [], []
    for item in train_cases:
        for path in patient_paths[item]:
            train_cases_paths.append(path)

    for item in val_cases:
        for path in patient_paths[item]:
            valid_cases_paths.append(path)

    for item in test_cases:
        for path in patient_paths[item]:
            test_cases_paths.append(path)
    return train_cases_paths,valid_cases_paths,test_cases_paths

In [24]:
train_cases_paths,valid_cases_paths,test_cases_paths = generate_train_val_test_paths_files(patient_paths)

In [25]:
print(len(train_cases_paths)),print(len(valid_cases_paths)),print(len(test_cases_paths))

67
8
19


(None, None, None)

In [26]:
for fold in ['fold_1','fold_2','fold_3','fold_4', 'fold_5']:
    train_cases_paths,valid_cases_paths,test_cases_paths = generate_train_val_test_paths_files(patient_paths)
    train_cases_paths_from_test,valid_cases_paths_from_test,test_cases_paths_from_test = generate_train_val_test_paths_files(patient_paths_test)

    train_file=open(ROOT_EXP + '/partitions/'+fold+ '/train_v2.txt','w')
    valid_file=open(ROOT_EXP + '/partitions/'+fold+ '/valid_v2.txt','w')
    test_file=open(ROOT_EXP + '/partitions/'+fold+ '/test_v2.txt','w')
    for element in train_cases_paths:        
        train_file.write(element+'\n')
    for element in train_cases_paths_from_test:        
        train_file.write(element+'\n')
    train_file.close()
    
    for element in valid_cases_paths:        
        valid_file.write(element+'\n')
    for element in valid_cases_paths_from_test:        
        valid_file.write(element+'\n')
    valid_file.close()
    
    for element in test_cases_paths:        
        test_file.write(element+'\n')
    for element in test_cases_paths_from_test:        
        test_file.write(element+'\n')
    test_file.close()

In [29]:
print(len(train_cases_paths) , len(train_cases_paths_from_test))
print(len(valid_cases_paths) , len(valid_cases_paths_from_test) )
print(len(test_cases_paths), len(test_cases_paths_from_test))

67 45
8 7
19 10


In [28]:
68+9+17

94

In [30]:
67+45

112

40