In [1]:
from tfedlrn.collaborator.pytorchmodels.pytorch2dunet_data_pipeline \
  import PyTorch2DUNetDPipe as UnetWithPipeline

In [2]:
from tfedlrn.collaborator.pytorchmodels.pytorch2dunet \
  import PyTorch2DUNet as Unet

In [3]:
import tfedlrn.datasets as datasets

In [4]:
import numpy as np
import pickle
import os

In [5]:
# restrict to an available gpu
os.environ["CUDA_VISIBLE_DEVICES"]="9"

In [None]:
model = UnetWithPipeline(device='cpu')

In [None]:
dataset_dir = datasets._get_dataset_dir()

In [None]:
# testing to see that the path to get the appropriate indexed image 
# looks good at first glance

In [None]:
idx_to_train_paths = model.idx_to_train_paths

In [None]:
directories = os.listdir(os.path.join(dataset_dir, \
                  'BraTS17/MICCAI_BraTS17_Data_Training/HGG'))
directories[0:3]

In [None]:
# the paths below should match the concept of enumerateing all image slices
# obtained by grabbing 155 axial slice 
# images from each directory in the list above
idx_to_train_paths[0], idx_to_train_paths[1], idx_to_train_paths[2], \
idx_to_train_paths[154], idx_to_train_paths[155], idx_to_train_paths[310]

In [None]:
# each brain (155 slices) given by a directory in 'directories' at an index
# corresponds to the institution number below at the same index

In [None]:
with open(os.path.join(dataset_dir, 'BraTS17/brain_number_to_institution.pkl') \
          ,'rb') as file:
    brain_to_inst = pickle.load(file)
    
# convert to int and shifting by one to be 0-indexed
brain_to_inst = [int(entry)-1 for entry in brain_to_inst]
brain_to_inst[0:5]

In [None]:
# create a new list (of tuples now) whose first entry remains the
# correct insitution for that brain and the second is
# the index of the brain at that insitution
next_idx = np.zeros(10).astype(np.int32)
brain_to_inst_and_idx_offset = []
for brain_num, inst_num in enumerate(brain_to_inst):
    this_inst = brain_to_inst[brain_num]
    this_idx_offset = next_idx[this_inst]
    next_idx[this_inst] += 155
    brain_to_inst_and_idx_offset.append((this_inst, this_idx_offset))

In [None]:
brain_to_inst_and_idx_offset[0:6]

In [None]:
# collect all images and masks at each institution

In [None]:
imgs_inst0 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst0.npy'), [0,3, 1, 2])
msks_inst0 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst0.npy'), [0,3, 1, 2])

In [None]:
imgs_inst1 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst1.npy'), [0,3, 1, 2])
msks_inst1 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst1.npy'), [0,3, 1, 2])

In [None]:
imgs_inst2 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst2.npy'), [0,3, 1, 2])
msks_inst2 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst2.npy'), [0,3, 1, 2])

In [None]:
imgs_inst3 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst3.npy'), [0,3, 1, 2])
msks_inst3 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst3.npy'), [0,3, 1, 2])

In [None]:
imgs_inst4 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst4.npy'), [0,3, 1, 2])
msks_inst4 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst4.npy'), [0,3, 1, 2])

In [None]:
imgs_inst5 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst5.npy'), [0,3, 1, 2])
msks_inst5 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst5.npy'), [0,3, 1, 2])

In [None]:
imgs_inst6 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst6.npy'), [0,3, 1, 2])
msks_inst6 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst6.npy'), [0,3, 1, 2])

In [None]:
imgs_inst7 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst7.npy'), [0,3, 1, 2])
msks_inst7 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst7.npy'), [0,3, 1, 2])

In [None]:
imgs_inst8 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst8.npy'), [0,3, 1, 2])
msks_inst8 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst8.npy'), [0,3, 1, 2])

In [None]:
imgs_inst9 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst9.npy'), [0,3, 1, 2])
msks_inst9 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst9.npy'), [0,3, 1, 2])

In [None]:
by_institution = [(imgs_inst0, msks_inst0), 
                  (imgs_inst1, msks_inst1), 
                  (imgs_inst2, msks_inst2), 
                  (imgs_inst3, msks_inst3), 
                  (imgs_inst4, msks_inst4), 
                  (imgs_inst5, msks_inst5), 
                  (imgs_inst6, msks_inst6), 
                  (imgs_inst7, msks_inst7), 
                  (imgs_inst8, msks_inst8), 
                  (imgs_inst9, msks_inst9),]

In [None]:
# get the data pipeline fetcher
pipeline_fetcher = model.read_train

In [None]:
# Test by randomly selecting an index, then checking using the mapping
# above whether or not the corresponding image properly matches the
# institutional image already stored as a numpy array

In [None]:
def run_test():
    max_index = 155 * len(directories) - 1
    rand_idx = np.random.randint(0, max_index)
    pipeline_img, pipeline_msk = pipeline_fetcher(rand_idx)
    brain_num = rand_idx // 155
    inst_num, idx_offset = brain_to_inst_and_idx_offset[brain_num]
    idx = int(rand_idx % 155) + idx_offset
    inst_imgs, inst_msks = by_institution[inst_num]
    img, msk = inst_imgs[idx], inst_msks[idx]
    imgs_eq = pipeline_img == img
    msks_eq = pipeline_msk == msk
    answer = np.all(np.array([imgs_eq, msks_eq]))
    return rand_idx, answer
    
    
    

In [None]:
all_bool = []
idxs_tested = []
for _ in range(20):
    this_idx, this_answer = run_test()
    idxs_tested.append(this_idx)
    all_bool.append(this_answer)    
print("Indices tested were: {}".format(idxs_tested))    
print("The test was a success?: {}".format(np.all(np.array(all_bool))))

In [None]:
loss, mean_train_times, std_train_times, \
  mean_data_load_times, std_data_load_times = \
    model.train_epoch_test_performance(num_batches=5)

In [None]:
mean_train_times, mean_data_load_times, std_train_times

In [None]:
#Now compare to model that pulls data from memory

In [None]:
model_two = Unet(device='cpu')

In [None]:
loss_two, mean_train_times_two, std_train_times_two, \
  mean_data_load_times_two, std_data_load_times_two = \
    model_two.train_epoch_test_performance(num_batches=5)

In [None]:
mean_train_times_two, mean_data_load_times_two, std_train_times_two

In [None]:
# portion of model training taken up by data loading and processing
mean_data_load_times/mean_train_times

In [None]:
# portion of model_two training taken up by data loading and processing
mean_data_load_times_two/mean_train_times_two

In [None]:
# by what factor does train time expand when using pipeline
mean_train_times/mean_train_times_two

In [None]:
# Run tests on GPU 

In [6]:
model = UnetWithPipeline(device='cuda')

In [8]:
model.train_epoch_test_performance(num_batches=3)

input x has shape: torch.Size([64, 1, 128, 128])
input x has shape: torch.Size([64, 1, 128, 128])
input x has shape: torch.Size([64, 1, 128, 128])


(4.3159833,
 42.94891369342804,
 0.10941922664642334,
 42.72566342353821,
 0.11082005500793457)

In [9]:
model_two = Unet(device='cuda')

In [10]:
model_two.train_epoch_test_performance(num_batches=3)

(4.0242453,
 0.197165846824646,
 0.006660819053649902,
 0.002887129783630371,
 0.00030481815338134766)

In [11]:
# Using GPU, by what factor does train time expand when using pipeline
42.949/0.197


218.0152284263959

In [None]:
############# SCRAPS BELOW #############

In [None]:
Ru