In [1]:
import os
# import pydicom
import numpy as np
# import dicom_numpy
from os import listdir
from scipy.io import loadmat
#from scipy.misc import imread, imresize, imsave
from sklearn.model_selection import train_test_split
import h5py


In [2]:
def get_dataset(dataset_path, section_size = (128, 256), validation_size = 0.2, save_npy = False, dataset_save_path = 'Data_v4/npy_dataset', batch_size = 1, augmentation = False, normalization=False):
    # Create dateset:
    data_folder = dataset_path+'/'
    data_dirs = listdir(data_folder)
    scans = []
    dose_imgs = []
    i = 0
    for data in data_dirs:
        if not os.path.isdir(data_folder+data):
            f = loadmat(data_folder+data)
            scan = f['structset_2d_channel']
            dose_img = f['doseset_2d']
            dose_img = np.expand_dims(dose_img,axis=3)
            scan = np.array(scan, dtype='float32')
            dose_img = np.array(dose_img).astype('float32')
            if normalization:
                print('start normalization')
                mask = scan>0
                mask_Lung = np.squeeze(mask[:,:,:,7]) ## 7 is the lung dose
                mask_Lung = mask_Lung.flatten()
                dose_flat = dose_img.flatten()
                dose_masked =  np.ma.masked_where(mask_Lung==False, dose_flat)
                dose_Lung = dose_masked.compressed()
                print('orignal lung mean dose',dose_Lung.mean())
                dose_img = dose_img/dose_Lung.mean()*8.0
                
            print('scan shape and dose shape=',scan.shape, dose_img.shape)
            if(i==0):
                scans = scan
                dose_imgs = dose_img
                i = 1
            else:
                print('add new patient data')
                scans= np.concatenate((scans,scan),axis=0)
                dose_imgs=np.concatenate((dose_imgs,dose_img),axis=0)
            if augmentation:
                print('start augmentation')
                scan_flip = np.flip(scan,2)
                dose_img_flip = np.flip(dose_img,2)
                scans= np.concatenate((scans,scan_flip),axis=0)
                dose_imgs=np.concatenate((dose_imgs,dose_img_flip),axis=0)
    

    print('Scan Data Shape: ' + str(scans.shape))
    print('Segmantation Data Shape: ' + str(dose_imgs.shape))
    if not os.path.exists(dataset_save_path):
        os.makedirs(dataset_save_path)
    if save_npy:
        np.save(dataset_save_path+'/structures.npy', scans)
        np.save(dataset_save_path+'/dose.npy', dose_imgs)
        print('NPY dataset saved!')
        
    for batch_i in range(0, dose_imgs.shape[0], batch_size):
        batch_npy = np.concatenate((scans[batch_i:batch_i+batch_size],dose_imgs[batch_i:batch_i+batch_size]),axis=3)
        batch_npy = np.array(batch_npy)
        np.save(dataset_save_path+'/batch_{0}.npy'.format(batch_i), batch_npy)
  #  X, X_test, Y, Y_test = train_test_split(scans, dose_imgs, test_size=validation_size, random_state=42)
  #  print('Train Data Shape: ' + str(X.shape[0]))
  #  print('Test Data Shape: ' + str(X_test.shape[0]))
    return scans, dose_imgs

In [3]:
def split_npy_dataset(npy_dataset_path, split_npy_dataset_path, validation_path, batch_size, test_size):
    X = np.load(npy_dataset_path+'/structures.npy')
    Y = np.load(npy_dataset_path+'/dose.npy')

    if not os.path.exists(split_npy_dataset_path):
        os.makedirs(split_npy_dataset_path)
    if not os.path.exists(validation_path):
        os.makedirs(validation_path)

    X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
    print('X_test size',X_test.shape,'Y_test size=',Y_test.shape)
    test_npy = np.concatenate((X_test,Y_test),axis=3)
    test_npy = np.array(test_npy)

   # np.save(test_path+'/test.npy', test_npy)
    for batch_i in range(0, Y_test.shape[0], batch_size):
        batch_npy = np.concatenate((X_test[batch_i:batch_i+batch_size],Y_test[batch_i:batch_i+batch_size]),axis=3)
        batch_npy = np.array(batch_npy)
        np.save(validation_path+'/batch_{0}.npy'.format(batch_i), batch_npy)

    for batch_i in range(0, Y.shape[0], batch_size):
        batch_npy = np.concatenate((X[batch_i:batch_i+batch_size],Y[batch_i:batch_i+batch_size]),axis=3)
        batch_npy = np.array(batch_npy)
        np.save(split_npy_dataset_path+'/batch_{0}.npy'.format(batch_i), batch_npy)

        
    print('Splitted NPY Dataset saved!')

In [4]:
if __name__ == '__main__':
    dataset_path = 'Data_v4'
    npy_training_path = 'Data_v4/npy_dataset/training_npy_dataset'
    npy_validation_path = 'Data_v4/npy_dataset/validation_npy'
    npy_dataset_path ='Data_v4'
    training_path = 'Data_v4/training'
    validation_path = 'Data_v4/validation'

    scans, dose_imgs = get_dataset(training_path, section_size = (128, 256, 1), validation_size = 0.2, save_npy = False, dataset_save_path = npy_training_path, batch_size=1,augmentation=False, normalization=True)
    scans, dose_imgs = get_dataset(validation_path, section_size = (128, 256, 1), validation_size = 0.2, save_npy = False, dataset_save_path = npy_validation_path, batch_size=1,augmentation=False, normalization=True)
 #   split_npy_dataset(npy_dataset_path, splitted_npy_dataset_path, validation_path, batch_size = 1, test_size = 0.2)

start normalization
orignal lung mean dose 7.8809934
scan shape and dose shape= (28, 128, 256, 13) (28, 128, 256, 1)
start normalization
orignal lung mean dose 7.765043
scan shape and dose shape= (29, 128, 256, 13) (29, 128, 256, 1)
add new patient data
start normalization
orignal lung mean dose 7.938748
scan shape and dose shape= (30, 128, 256, 13) (30, 128, 256, 1)
add new patient data
start normalization
orignal lung mean dose 8.091753
scan shape and dose shape= (29, 128, 256, 13) (29, 128, 256, 1)
add new patient data
start normalization
orignal lung mean dose 7.9635015
scan shape and dose shape= (29, 128, 256, 13) (29, 128, 256, 1)
add new patient data
start normalization
orignal lung mean dose 7.905461
scan shape and dose shape= (27, 128, 256, 13) (27, 128, 256, 1)
add new patient data
start normalization
orignal lung mean dose 8.273853
scan shape and dose shape= (30, 128, 256, 13) (30, 128, 256, 1)
add new patient data
start normalization
orignal lung mean dose 7.794336
scan sha

In [None]:
test1 = get_scan('Data/structset.mat')
test2  = get_dose_img('Data/doseset.mat')


In [None]:
X, X_test, Y, Y_test = train_test_split(test1, test2, test_size=0.2, random_state=42)

In [None]:
X_test.shape

In [None]:
voxel_array.shape
dose_array=np.expand_dims(dose_array,axis=3)
print(voxel_array.shape)
print(dose_array.shape)
test=np.concatenate((voxel_array,dose_array),axis=3)
np.array(test)
print(test.shape)

In [None]:
def get_scan(mat_path):
    # Getting structure set images from path:
    if not os.path.exists(mat_path):
        print('MAT files not exists!')
        return

    f = loadmat('Data/structset_2d_channel.mat')
    voxel_array = f['structset_2d_channel']
   # voxel_array = np.expand_dims(voxel_array,axis=3)
    return voxel_array


In [None]:
def get_dose_img(images_path):
    # Getting dose image from file
    if not os.path.exists(images_path):
        print('Dose images not exists!')
        return

    f = loadmat('Data/doseset_2d.mat')
    dose = f['doseset_2d']
    dose = np.expand_dims(dose,axis=3)
    return dose

In [None]:
def scan_pading(scan, seg_img, section_size):
    # For easly split:
    pad_size = section_size - (scan.shape[-1] % section_size)
    if pad_size != section_size:
        padded_scan = np.pad(scan, ((0,0),(0,0),(0,pad_size)), 'constant')
        try:
            padded_seg_img = np.pad(seg_img, ((0,0),(0,0),(0,pad_size)), 'constant')
        except:
            padded_seg_img = None
    else:
        padded_scan = scan
        padded_seg_img = seg_img
    return padded_scan, padded_seg_img


In [None]:
def split_scans_imgs(scans, seg_img, section_size):
    # Split with sliding window:
    splitted_scans = []
    for i in range(0, scans.shape[-1]-(section_size-1)):
        splitted_scans.append(scans[:,:,i:i+section_size])

    splitted_seg_img = []
    for i in range(0, seg_img.shape[-1]-(section_size-1)):
        splitted_seg_img.append(seg_img[:,:,i:i+section_size])

    splitted_scans = np.array(splitted_scans)
    splitted_seg_img = np.array(splitted_seg_img)
    return splitted_scans, splitted_seg_img

In [None]:
def read_npy_dataset(npy_dataset_path, test_size = 0.2):
    X = np.load(npy_dataset_path+'/structures.npy')
    Y = np.load(npy_dataset_path+'/dose.npy')
    X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size)
    print('Train Data Shape: ' + str(X.shape[0]))
    print('Test Data Shape: ' + str(X_test.shape[0]))
    return X, X_test, Y, Y_test
