In [1]:
from tfedlrn.collaborator.pytorchmodels.pytorch2dunet_data_pipeline \
  import PyTorch2DUNetDPipe as UnetWithPipeline

In [2]:
from tfedlrn.collaborator.pytorchmodels.pytorch2dunet \
  import PyTorch2DUNet as Unet

In [3]:
import tfedlrn.datasets as datasets

In [4]:
import numpy as np
import pickle
import os

In [5]:
# restrict to an available gpu
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [6]:
model = UnetWithPipeline(device='cpu')

In [7]:
dataset_dir = datasets._get_dataset_dir()

In [8]:
# testing to see that the path to get the appropriate indexed image 
# looks good at first glance

In [9]:
idx_to_train_paths = model.idx_to_train_paths

In [10]:
directories = os.listdir(os.path.join(dataset_dir, \
                  'BraTS17/MICCAI_BraTS17_Data_Training/HGG'))
directories[0:3]

['Brats17_TCIA_401_1', 'Brats17_CBICA_AUN_1', 'Brats17_TCIA_319_1']

In [11]:
# the paths below should match the concept of enumerateing all image slices
# obtained by grabbing 155 axial slice 
# images from each directory in the list above
idx_to_train_paths[0], idx_to_train_paths[1], idx_to_train_paths[2], \
idx_to_train_paths[154], idx_to_train_paths[155], idx_to_train_paths[310]

('/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_TCIA_401_1',
 '/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_TCIA_401_1',
 '/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_TCIA_401_1',
 '/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_TCIA_401_1',
 '/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_CBICA_AUN_1',
 '/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_TCIA_319_1')

In [12]:
# each brain (155 slices) given by a directory in 'directories' at an index
# corresponds to the institution number below at the same index

In [13]:
with open(os.path.join(dataset_dir, 'BraTS17/brain_number_to_institution.pkl') \
          ,'rb') as file:
    brain_to_inst = pickle.load(file)
    
# convert to int and shifting by one to be 0-indexed
brain_to_inst = [int(entry)-1 for entry in brain_to_inst]
brain_to_inst[0:5]

[1, 0, 7, 2, 1]

In [14]:
# create a new list (of tuples now) whose first entry remains the
# correct insitution for that brain and the second is
# the index of the brain at that insitution
next_idx = np.zeros(10).astype(np.int32)
brain_to_inst_and_idx_offset = []
for brain_num, inst_num in enumerate(brain_to_inst):
    this_inst = brain_to_inst[brain_num]
    this_idx_offset = next_idx[this_inst]
    next_idx[this_inst] += 155
    brain_to_inst_and_idx_offset.append((this_inst, this_idx_offset))

In [15]:
brain_to_inst_and_idx_offset[0:6]

[(1, 0), (0, 0), (7, 0), (2, 0), (1, 155), (8, 0)]

In [16]:
# collect all images and masks at each institution

In [17]:
imgs_inst0 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst0.npy'), [0,3, 1, 2])
msks_inst0 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst0.npy'), [0,3, 1, 2])

In [18]:
imgs_inst1 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst1.npy'), [0,3, 1, 2])
msks_inst1 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst1.npy'), [0,3, 1, 2])

In [19]:
imgs_inst2 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst2.npy'), [0,3, 1, 2])
msks_inst2 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst2.npy'), [0,3, 1, 2])

In [20]:
imgs_inst3 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst3.npy'), [0,3, 1, 2])
msks_inst3 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst3.npy'), [0,3, 1, 2])

In [21]:
imgs_inst4 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst4.npy'), [0,3, 1, 2])
msks_inst4 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst4.npy'), [0,3, 1, 2])

In [22]:
imgs_inst5 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst5.npy'), [0,3, 1, 2])
msks_inst5 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst5.npy'), [0,3, 1, 2])

In [23]:
imgs_inst6 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst6.npy'), [0,3, 1, 2])
msks_inst6 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst6.npy'), [0,3, 1, 2])

In [24]:
imgs_inst7 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst7.npy'), [0,3, 1, 2])
msks_inst7 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst7.npy'), [0,3, 1, 2])

In [25]:
imgs_inst8 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst8.npy'), [0,3, 1, 2])
msks_inst8 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst8.npy'), [0,3, 1, 2])

In [26]:
imgs_inst9 = np.transpose(np.load('/raid/datasets/BraTS17/imgs_inst9.npy'), [0,3, 1, 2])
msks_inst9 = np.transpose(np.load('/raid/datasets/BraTS17/msks_inst9.npy'), [0,3, 1, 2])

In [27]:
by_institution = [(imgs_inst0, msks_inst0), 
                  (imgs_inst1, msks_inst1), 
                  (imgs_inst2, msks_inst2), 
                  (imgs_inst3, msks_inst3), 
                  (imgs_inst4, msks_inst4), 
                  (imgs_inst5, msks_inst5), 
                  (imgs_inst6, msks_inst6), 
                  (imgs_inst7, msks_inst7), 
                  (imgs_inst8, msks_inst8), 
                  (imgs_inst9, msks_inst9),]

In [28]:
# get the data pipeline fetcher
pipeline_fetcher = model.read_train

In [29]:
# Test by randomly selecting an index, then checking using the mapping
# above whether or not the corresponding image properly matches the
# institutional image already stored as a numpy array

In [30]:
def run_test():
    max_index = 155 * len(directories) - 1
    rand_idx = np.random.randint(0, max_index)
    pipeline_img, pipeline_msk = pipeline_fetcher(rand_idx)
    brain_num = rand_idx // 155
    inst_num, idx_offset = brain_to_inst_and_idx_offset[brain_num]
    idx = int(rand_idx % 155) + idx_offset
    inst_imgs, inst_msks = by_institution[inst_num]
    img, msk = inst_imgs[idx], inst_msks[idx]
    imgs_eq = pipeline_img == img
    msks_eq = pipeline_msk == msk
    answer = np.all(np.array([imgs_eq, msks_eq]))
    return rand_idx, answer
    
    
    

In [31]:
all_bool = []
idxs_tested = []
for _ in range(10):
    this_idx, this_answer = run_test()
    idxs_tested.append(this_idx)
    all_bool.append(this_answer)    
print("Indices tested were: {}".format(idxs_tested))    
print("The test was a success?: {}".format(np.all(np.array(all_bool))))

Indices tested were: [5892, 20676, 31711, 10999, 23633, 27474, 8211, 18020, 1768, 30852]
The test was a success?: True


In [32]:
loss, mean_train_times, std_train_times, \
  mean_data_load_times, std_data_load_times = \
    model.train_epoch_test_performance(num_batches=5)

input x has shape: torch.Size([64, 1, 128, 128])
input x has shape: torch.Size([64, 1, 128, 128])
input x has shape: torch.Size([64, 1, 128, 128])
input x has shape: torch.Size([64, 1, 128, 128])
input x has shape: torch.Size([64, 1, 128, 128])


In [33]:
mean_train_times, mean_data_load_times, std_train_times

(52.043133199214935, 44.28689420223236, 3.7586319103531824)

In [34]:
#Now compare to model that pulls data from memory

In [35]:
model_two = Unet(device='cpu')

In [36]:
loss_two, mean_train_times_two, std_train_times_two, \
  mean_data_load_times_two, std_data_load_times_two = \
    model_two.train_epoch_test_performance(num_batches=5)

In [37]:
mean_train_times_two, mean_data_load_times_two, std_train_times_two

(7.532769978046417, 0.005398869514465332, 0.11272035446925488)

In [40]:
# portion of model training taken up by data loading and processing
mean_data_load_times/mean_train_times

0.8509651798385656

In [41]:
# portion of model_two training taken up by data loading and processing
mean_data_load_times_two/mean_train_times_two

0.0007167176921902372

In [42]:
# by what factor does train time expand when using pipeline
mean_train_times/mean_train_times_two

6.908897172074812

In [38]:
############# SCRAPS BELOW #############

In [39]:
batch_size = 64
pipeline_fetcher = model_w_pipe.read_train
images_in_batch = [pipeline_fetcher(i) for i in range(batch_size)]
features_in_batch = [np.expand_dims(features, axis=0) for features, labels in images_in_batch]
labels_in_batch = [np.expand_dims(labels, axis=0) for features, labels in images_in_batch]

NameError: name 'model_w_pipe' is not defined

In [None]:
features_in_batch[0].shape

In [None]:
batch_features_pipe = np.concatenate(features_in_batch, axis=0)
batch_labels_pipe = np.concatenate(labels_in_batch, axis=0)

In [None]:
batch_features_pipe.shape

In [None]:
print(batch_features_pipe.dtype)
print(batch_features.dtype)

In [None]:
# adjust above to account for our 0 first ordering
brain_to_inst = [str(int(inst) - 1) for inst in brain_to_inst]
brain_to_inst


In [None]:
# determine how the files show up when walking the directory
# as we do in the new_dataset_converter.py
root_dir = '/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG'
dir_dict = {}
counter = 0
for subdir, dir, files in os.walk(root_dir):
    if subdir == root_dir:
        continue
    else:
        dir_dict[subdir] = counter
        print(subdir)
    counter += 1
    
    

In [None]:
# find when the file we see first (using ls) comes up in the list
# I am working off of the assumption that the map of brain to inst uses
# the order observed using ls (rather than the walk above)
location_of_first = \
  dir_dict['/raid/datasets/BraTS17/MICCAI_BraTS17_Data_Training/HGG/Brats17_2013_10_1']
print("Our file walk will put the first brain we see at index: {}".format(location_of_first))

In [None]:
# lets grab the first slice of the brain at that index


In [None]:
# find which insitution holds this ls_first brain
# recall we are guessing the brain_to_inst follows ls order
institution_focus = brain_to_inst[0]  
print("The first brain comes from inst: {}".format(institution_focus))
# inst 1 was the answer above (which means imgs_inst1.npy)
# we assume for now that the imgs_inst1.npy file was filled with brains
# using the ls_first order
which_brain_at_nine = 0
print("The brain given by file Brats17_2013_10_1 is brain number "\
  "{} at insitution {}.".format(which_brain_at_nine, institution_focus))

In [None]:
# double check above

In [None]:
---------------------

In [None]:
---------------------

In [None]:
imgs_inst1.shape

In [None]:
# test to see if this all adds up
np.all(imgs_inst1[0]== batch_features_pipe.numpy()[0])

In [None]:
imgs_train.shape, imgs_inst1.shape

In [None]:
gen = [np.all(imgs_train[70] == imgs_inst1[70+155*num]) for num in range(int(len(imgs_inst1)/155))]
for thing in gen:
    print(thing)

In [None]:
gen = [np.all(imgs_train[70] == imgs_inst0[70+155*num]) for num in range(int(len(imgs_inst0)/155))]
for thing in gen:
    print(thing)

In [None]:
gen = [np.all(imgs_train[70] == imgs_inst2[70+155*num]) for num in range(int(len(imgs_inst2)/155))]
for thing in gen:
    print(thing)

In [None]:
imgs_train = np.transpose(np.load('/raid/datasets/BraTS17/imgs_train.npy'), [0,3, 1, 2])
msks_train = np.transpose(np.load('/raid/datasets/BraTS17/msks_train.npy'), [0,3, 1, 2])

In [None]:
# manually update channels on features
imgs_train = imgs_train[:,2,:,:]
imgs_train = np.expand_dims(imgs_train, axis=1)
imgs_train.shape

In [None]:
batch_features_pipe.numpy()[0].shape, imgs_inst8[8*155].shape

In [None]:
np.all(batch_features_pipe.numpy()[0] == imgs_inst8[8*155])

In [None]:
# manually update channels on features
first_brain = first_brain[:,2,:,:]
first_brain = np.expand_dims(first_brain, axis=1)
first_brain.shape

In [None]:
np.all(first_brain[0] == imgs_inst1[0])

In [None]:
first_brain[0].shape, imgs_inst1[0].shape

In [None]:
np.all(imgs_inst2[0] == batch_features_pipe[0].numpy())


In [None]:
np.all(batch_features.numpy()==batch_features_pipe)

In [None]:
model_w_pipe.train_epoch()

In [None]:
model.train_epoch()

In [None]:
# unit testing

In [None]:
slice_num = 71
one_img, one_msk = pipeline_fetcher(71)
one_img.shape

In [None]:
institutional_sets = [imgs_inst0, imgs_inst1, imgs_inst2, imgs_inst3, imgs_inst4, imgs_inst5, imgs_inst6, imgs_inst7, imgs_inst8, imgs_inst9]

In [None]:
def compare(target_img, slice_num, institutional_sets):
    slice_num = slice_num % 155
    for idx, inst_imgs in enumerate(institutional_sets):
        if target_img.shape != inst_imgs[idx].shape:
            print("Shapes are: {} and {}.".format(target_img.shape, inst_imgs[idx].shape))
            raise ValueError("Shapes do not match for at least one insitution!!")
        gen = [(num, np.all(target_img == inst_imgs[slice_num+155*num])) \
               for num in range(int(len(inst_imgs)/155))]
        for num, answer in gen:
            if answer == True:
                print("Equality at brain_number: {}, inst idx: {}".
                      format(num, idx))

In [None]:
for slice_num in range(160, 169, 1):
    print("For slice number: {}".format(slice_num))
    target_img, target_msk = pipeline_fetcher(slice_num)
    compare(target_img, slice_num, institutional_sets)

In [None]:
for slice_num in range(369, 379, 1):
    batch_wanted = slice_num // batch_size
    idx_within_batch = slice_num % batch_size
    for batch_num, (target_img, target_label) in enumerate(model.train_loader):
        if batch_num == batch_wanted:
            img = target_img[idx_within_batch]
            break
    print("For slice number: {}".format(slice_num))
    compare(img.numpy(), slice_num, institutional_sets)

In [None]:
print(batch_features_pipe.shape, imgs_inst1[0:64].shape)
np.all(batch_features_pipe == imgs_inst1[0:64])

In [None]:
compare(one_img, 70, institutional_sets)

In [None]:
compare(one_img, 50, institutional_sets)

In [None]:
for i in range(50, 60, 1):
    print(i)
    compare(one_img, i, institutional_sets)

In [None]:
gen = [(num, np.all(one_img == imgs_inst1[70+155*num])) for num in range(int(len(imgs_inst1)/155))]
for num, thing in gen:
    if thing == True:
        print("We got equality at a num of: {}".format(num))

In [None]:
gen = [(num, np.all(one_img == imgs_inst2[70+155*num])) for num in range(int(len(imgs_inst2)/155))]
for num, thing in gen:
    if thing == True:
        print("We got equality at a num of: {}".format(num))

In [None]:
gen = [(num, np.all(one_img == imgs_inst3[70+155*num])) for num in range(int(len(imgs_inst3)/155))]
for num, thing in gen:
    if thing == True:
        print("We got equality at a num of: {}".format(num))

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
%matplotlib inline

fig = plt.figure()


def f(x, y):
    return np.sin(x) + np.cos(y)

x = np.linspace(0, 2 * np.pi, 120)
y = np.linspace(0, 2 * np.pi, 100).reshape(-1, 1)
# ims is a list of lists, each row is a list of artists to draw in the
# current frame; here we are just animating one artist, the image, in
# each frame
ims = []
for i in range(60):
    x += np.pi / 15.
    y += np.pi / 20.
    im = plt.imshow(f(x, y), animated=True)
    ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True,
                                repeat_delay=1000)

# To save the animation, use e.g.
#
# ani.save("movie.mp4")
#
# or
#
# from matplotlib.animation import FFMpegWriter
# writer = FFMpegWriter(fps=15, metadata=dict(artist='Me'), bitrate=1800)
# ani.save("movie.mp4", writer=writer)

plt.show()