In [1]:
import numpy as np 
import pandas as pd 
import os
from joblib import Parallel, delayed
import SimpleITK as sitk
from scipy import ndimage

In [2]:
def load_itk_image(filename):
    itkimage = sitk.ReadImage(filename)
    numpyImage = sitk.GetArrayFromImage(itkimage)
    numpyOrigin = np.array(list(reversed(itkimage.GetOrigin())))
    numpySpacing = np.array(list(reversed(itkimage.GetSpacing())))
    return numpyImage, numpyOrigin, numpySpacing

In [3]:
def worldToVoxelCoord(worldCoord, origin, spacing):
    stretchedVoxelCoord = np.absolute(worldCoord - origin)
    voxelCoord = stretchedVoxelCoord / spacing
    return voxelCoord

In [4]:
def resize_voxel(x, desired_shape):
    factors = np.array(x.shape).astype('float32') / np.array(desired_shape).astype('float32')
    assert all(s > 1 for s in x.shape)
    output= ndimage.interpolation.zoom(x,1.0 / factors,order=1)
    assert output.shape == desired_shape,'resize error'
    return output

In [5]:
def sample_random_voxels(patient, df, n_sample,VOXEL_SIZE,PATH):
    #choose random voxels from this id,
    #if they contain a nodule, return the index of this nodule in the dataframe
    #from that we can determine size, attributes
    #TODO: TRIM IMAGE

    df['ix'] = range(df.shape[0])
    dfsub = df[df['seriesuid']==patient]
    nodule_coords = []
    nodule_sizes = []
    nodule_ixs = []

    if len(dfsub) > 0:
        for i in range(dfsub.shape[0]):
            row = dfsub.iloc[i]
            nodule_coords.append((row['coordZ'], row['coordY'], row['coordX']))
            nodule_sizes.append(row['diameter_mm'])


    img,origin,spacing = load_itk_image(PATH + patient + '.mhd')
    voxel_coords = [worldToVoxelCoord(c,origin,spacing) for c in nodule_coords]


    # zoom = np.random.uniform(.99,1.01)
    numZpix = np.round(np.random.uniform(.99,1.01) *float(VOXEL_SIZE) / spacing[0])
    # assert numZpix > 10, 'too few z pixels'
    numYpix = np.round(np.random.uniform(.99,1.01) *float(VOXEL_SIZE) / spacing[1])
    # assert numYpix > 10, 'too few y pixels'
    numXpix = np.round(np.random.uniform(.99,1.01) *float(VOXEL_SIZE) / spacing[2])
    # assert numXpix > 10, 'too few x pixels'

    voxels = []
    ixs = []
    for i in range(int(n_sample)):

        x_center = np.random.randint(low=numXpix/2,high=img.shape[2]-numXpix/2)
        y_center = np.random.randint(low=numYpix/2,high=img.shape[1]-numYpix/2)
        z_center = np.random.randint(low=numZpix/2,high=img.shape[0]-numZpix/2)

        z_start = np.clip(z_center-numZpix/2, 0, img.shape[0])
        z_end = np.clip(z_center+numZpix/2, 0, img.shape[0])
        y_start = np.clip(y_center-numYpix/2, 0, img.shape[1])
        y_end = np.clip(y_center+numYpix/2, 0, img.shape[1])
        x_start = np.clip(x_center-numXpix/2, 0, img.shape[2])
        x_end = np.clip(x_center+numXpix/2, 0, img.shape[2])

        voxel = img[int(z_start):int(z_end),int(y_start):int(y_end),int(x_start):int(x_end)]
        
        voxel_norm = resize_voxel(voxel, (VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE))

        voxel_norm = np.clip(voxel_norm, -1000, 400)

        #determine index (if applicable)
        #if no match put -1.
        ix = -1
        for i,(coord,size) in enumerate(zip(voxel_coords,nodule_sizes)):
            if (x_start + size*1.5 <= coord[2] <= x_end - size*1.5) and (y_start + size*1.5 <= coord[1] <= y_end - size*1.5) and (z_start + size*1.5 <= coord[0] <= z_end - size*1.5):
                #we got one
                ix = dfsub.iloc[i]['ix']
                # pdb.set_trace()
                #target = size
        voxels.append(np.transpose(voxel_norm, (2,1,0)))
        ixs.append(ix)

    return np.stack(voxels),np.stack(ixs)

In [6]:
def get_Xrandom_new(VOXEL_SIZE):

    path_raw = '../data/'
    path_save = '../voxels_random64/'
    
    df = pd.read_csv(path_raw+'CSVFILES/annotations_enhanced.csv')
    n_views = np.around(6*(64**3)/(VOXEL_SIZE**3))
    
    for directory in [d for d in os.listdir(path_raw) if 'subset' in d]:
        path_directory = path_raw+directory+'/'
        luna_ids = [f.replace('.mhd','') for f in os.listdir(path_directory) if '.mhd' in f]
        print (directory,'contains',len(luna_ids),'patients')
        args = [(id,df,n_views) for id in luna_ids]
        results = Parallel(n_jobs=2,verbose=0)(delayed(sample_random_voxels)(arg[0], arg[1],arg[2],VOXEL_SIZE,path_directory) for arg in args )

        #results is a list of (voxels, ixs)
        voxels = np.concatenate([r[0] for r in results if r is not None])
        ixs = np.concatenate([r[1] for r in results if r is not None])
        np.save(path_save+directory+'Xrandom.npy', voxels)
        np.save(path_save+directory+'IXrandom.npy', ixs)

In [7]:
def main(stage,rs):
    np.random.seed(rs)
    get_Xpositive_new(stage)
    exit()

In [8]:
main(64,42)

subset2 contains 89 patients




subset9 contains 88 patients
subset5 contains 89 patients
subset3 contains 89 patients
subset4 contains 89 patients
subset6 contains 89 patients
subset0 contains 89 patients
subset8 contains 88 patients
subset1 contains 89 patients
subset7 contains 89 patients
