In [86]:
import numpy as np
import os
import nrrd
import sys
import copy
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('seaborn-dark')

In [87]:
def load_img(data_dir, filename, filetype):
    img, _ = nrrd.read(os.path.join(data_dir, '{}_{}.nrrd'.format(filename, filetype)))
    # img = img.astype(np.float32)
    img = img.transpose(1,2,0)
    return img

def normalize(img, minimum=-1000, maximum=1000):
    img = copy.deepcopy(img)
    img[img > maximum] = maximum
    img[img < minimum] = minimum
    img = (img - minimum) / max(1, (maximum - minimum))
    return img

def save_subvolumes(img, masks, save_dir, filename, h=64, w=64, d=64):
    
    save_path = os.path.join(save_dir, filename)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    H, W, D = img.shape
    for x in range(0, H, h):
        end_x = min(x+h, H)
        start_x = end_x - h
        for y in range(0, W, w):
            end_y = min(y+w, W)
            start_y = end_y - w
            for z in range(0, D, d):
                end_z = min(z+d, D)
                start_z = end_z - d

                subvolume = img[np.newaxis, start_x:end_x, start_y:end_y, start_z:end_z]
                mask_subvolume = np.concatenate([arr[np.newaxis, start_x:end_x, start_y:end_y, start_z:end_z] for arr in masks], axis=0)
                np.save(os.path.join(save_path, '{}_{}_{}_{}.npy'.format(filename, start_x, start_y, start_z)), 
                        {'image':subvolume, 'label':mask_subvolume})

In [88]:
finetune_dir = '/media/hdd10tb/xiangyiy/data/preprocessed'
roi_names = ['Large Bowel',
            'Duodenum', 
            'Spinal Cord',
            'Liver',
            'Spleen',
            'Small Bowel',
            'Pancreas',
            'Kidney L',
            'Kidney R',
            'Stomach',
            'Gallbladder']
roi_names.sort()

save_dir = '/media/hdd10tb/junayedn/private_abdomen/3d_preprocessed'
train_filenames = pd.read_csv('/home/junayedn/ModelsGenesis/pytorch/split/abdomen_train_superpixel_10.csv')['eid'].values
val_filenames = pd.read_csv('/home/junayedn/ModelsGenesis/pytorch/split/abdomen_val.csv')['eid'].values
filenames = list(train_filenames) + list(val_filenames)

for f in filenames:
    img = load_img(finetune_dir, f, 'clean')
    img = normalize(img)
    masks = []
    for roi in roi_names:
        if not os.path.exists(os.path.join(finetune_dir, '{}_{}.nrrd'.format(f, roi))):
            masks.append(np.zeros(img.shape).astype(np.uint8))
        else:
            masks.append(load_img(finetune_dir, f, roi))
    save_subvolumes(img, masks, save_dir, f, h=160, w=160, d=64)

In [75]:
min_h = 100000
min_w = 100000
min_d = 100000

max_h = 0
max_w = 0
max_d = 0

img_nums= [f.split('_')[0] for f in os.listdir('/media/hdd10tb/xiangyiy/data/preprocessed') if '_clean' in f]
for f in img_nums:
    img = load_img('/media/hdd10tb/xiangyiy/data/preprocessed', f, 'clean')
    h,w,d = img.shape
    
    if h < min_h:
        min_h = h
    if h > max_h:
        max_h = h

    if w < min_w:
        min_w = w
    if w > max_w:
        max_w = w

    if d < min_d:
        min_d = d
    if d > max_d:
        max_d = d

print(min_h, min_w, min_d)
print(max_h, max_w, max_d)

172 268 100
316 508 333


In [79]:
num_files = 0
for eid in os.listdir(save_dir):
    num_files += len(os.listdir(os.path.join(save_dir, eid)))
print(num_files)

665
