# Modification

**v3.6 complement of the preprocessing that should be run after inpainting ** 
1. We resample along the slices
1. We reproduce the inpainting steps (where small cubes where obtained) to obtain the right coordinates of the nodules
1. These coordinates are obtained from the pylidc and with the info

**dicom full-preprocessing (misc1) v3 - interpolate only 2axes to inpaint**
1. We resample along the vertical and horizontal axes but we don't resample along the slices. This creates a smaller volume to apply inpainting. The slices axis has to be resamples later
1. We dilate the union of the segmentations
1. We dilate the lungs mask with kernel=1 (it was 5)

In [None]:
# Compared to the previous version (v2), this script removes the scans with bad
# slices (>2.5mm or inconsistency between spacing and thickness)

import os # module for interfacing with the os
import numpy as np # numpy for arrays etc
import pandas as pd # module for creating and querying data tables (databases) efficiently
import pylidc as pl # module for handling the LIDC dataset
import matplotlib.pyplot as plt # plotting utilities
import matplotlib.patches as patches
import scipy.ndimage # ~
import scipy.sparse
import scipy
from preprocessing.preprocess_functions import *
from utils_LIDC.utils_LIDC import *
from pylidc.utils import consensus
from skimage.morphology import ball, dilation

In [None]:
from inpainting_nodules_functions import *

In [None]:
from scipy import sparse
from tqdm import tqdm_notebook

In [None]:
LIDC_PATH = '/data/datasets/LIDC-IDRI/' # original LIDC data
# annotations = pd.read_csv('/data/datasets/LIDC-IDRI/annotations.csv')
LIDC_IDs = os.listdir(f'{LIDC_PATH}LIDC-IDRI')
LIDC_IDs = [i for i in LIDC_IDs if 'LIDC' in i]
LIDC_IDs = np.sort(LIDC_IDs)

# output path
path_dest = f'/data/OMM/Datasets/LIDC_other_formats/LIDC_preprocessed_3D v5 - save pylidc chars only/' 
path_data_alreadyprocessed = '/data/OMM/Datasets/LIDC_other_formats/LIDC_preprocessed_3D v4 - inpaint before preprocess/'
path_already_inpainted = '/data/OMM/project results/Feb 5 19 - Deep image prior/dip results all 17/arrays/'
if not os.path.exists(path_dest): os.makedirs(path_dest)

In [None]:
def nodule_coords_in_small_resampled_versions(df, resampling_ratio, min_box_x, min_box_y, min_box_channels):
    '''
    Get the coordinates of the nodules in the smaller resampled volumes.
    This is done to be able to link each nodule to their pylidc labels
    We need to get into account the resampling ratio and the number of voxels used during the
    "Find the minimum box that contain the lungs" of the "read_slices3D_v3" function
    '''
    pd.options.mode.chained_assignment = None
    df['small_coordsZ']=df['lidc_coordZ'].values * resampling_ratio[0] - np.min(min_box_channels)
    df['small_coordsX']=df['lidc_coordX'].values * resampling_ratio[1] - np.min(min_box_x)
    df['small_coordsY']=df['lidc_coordY'].values * resampling_ratio[2] - np.min(min_box_y)
    return df

In [None]:
def nodule_coords_in_small_resampled_versions2(df, resampling_ratio, min_box_x, min_box_y, min_box_z,
                                              slice_middle, xmed_1, ymed_1, xmed_2, ymed_2):
    '''
    Get the coordinates of the nodules in the smaller resampled volumes.
    This is done to be able to link each nodule to their pylidc labels
    We need to get into account the resampling ratio and the number of voxels used during the
    "Find the minimum box that contain the lungs" of the "read_slices3D_v3" function
    '''
    pd.options.mode.chained_assignment = None
    # Transform the original coords to small cube coords
    COORDZ = (np.mean(df['lidc_coordZ'].values) * resampling_ratio[0]) - np.min(min_box_z)
    COORDX = (np.mean(df['lidc_coordX'].values) * resampling_ratio[1]) - np.min(min_box_x)
    COORDY = (np.mean(df['lidc_coordY'].values) * resampling_ratio[2]) - np.min(min_box_y)
    coords_in_small_cube = np.asarray(COORDZ, COORDX, COORDY)
    # Find if nodule is closer to left or right nodule
    # MAYBE JUST COMPARE THE Z COORD AGAINST THE SLICE MIDDLE
    #coords_center_cube1 = np.asarray(slice_middle, xmed_1, ymed_1)
    #coords_center_cube2 = np.asarray(slice_middle, xmed_2, ymed_2)
    #dist1 = np.linalg.norm(coords_in_small_cube - coords_center_cube1)
    #dist2 = np.linalg.norm(coords_in_small_cube - coords_center_cube2)
    #COMPARE ONLY THE Y DIRECTION
    dist1=np.abs(COORDY-ymed_1)
    dist2=np.abs(COORDY-ymed_2)
    if dist1<dist2: 
        coord_adj_Z = ((df['lidc_coordZ'].values * resampling_ratio[0]) - np.min(min_box_z)) - c_zmin1
        coord_adj_X = ((df['lidc_coordX'].values * resampling_ratio[1]) - np.min(min_box_x)) - c_xmin1
        coord_adj_Y = ((df['lidc_coordY'].values * resampling_ratio[2]) - np.min(min_box_y)) - c_ymin1
        nodule_in_block = 1
        #print('1', coord_adj_Z, coord_adj_X, coord_adj_Y)
        
    else: 
        coord_adj_Z = ((df['lidc_coordZ'].values * resampling_ratio[0]) - np.min(min_box_z)) - c_zmin2
        coord_adj_X = ((df['lidc_coordX'].values * resampling_ratio[1]) - np.min(min_box_x)) - c_xmin2
        coord_adj_Y = ((df['lidc_coordY'].values * resampling_ratio[2]) - np.min(min_box_y)) - c_ymin2
        nodule_in_block = 2
        #print('2', coord_adj_Z, coord_adj_X, coord_adj_Y)
    
    df['small_coordsZ']= coord_adj_Z
    df['small_coordsX']= coord_adj_X
    df['small_coordsY']= coord_adj_Y
    df['nodule_in_block'] = nodule_in_block 
    return df, coord_adj_Z, coord_adj_X, coord_adj_Y

In [None]:
def resample_grid_except_slices(image, spacing, new_spacing=[1,1,1],method='linear'):
    '''resample along the vertical and horizontal axes but don't resample along the slices.
    This creates a smaller volume to apply inpainting. The slices axis has to be resamples later'''    
    x, y, z = [spacing[k] * np.arange(image.shape[k]) for k in range(3)]  # original grid in mm
    x = np.arange(np.shape(image)[0]) # we dont interpolate in x direction (slices)
    f = scipy.interpolate.RegularGridInterpolator((x, y, z), image)#, method='linear')    # interpolator
#    print('Interpolating')

    dx, dy, dz = new_spacing    # new step sizes
    new_grid = np.mgrid[0:x[-1]+1:dx, 0:y[-1]:dy, 0:z[-1]:dz] # a '+1' is added
    new_grid = np.moveaxis(new_grid, (0, 1, 2, 3), (3, 0, 1, 2))  # reorder axes for evaluation
    imageOut = f(new_grid)
    
    # convert back to the same type as input (if it was an int, round first!)
    dataType = image.dtype
    if np.issubdtype(image[0,0,0],np.signedinteger) or np.issubdtype(image[0,0,0],np.unsignedinteger):
        imageOut = np.round(imageOut)
        
    imageOut = imageOut.astype(dataType)
    
    return imageOut, new_spacing 

In [None]:
def resample_grid_slices(image, spacing, new_spacing=[1,1,1],method='linear'):
    '''DO NOT resample along the vertical and horizontal axes, ONLY resample along the slices.
    This is done because before inpainting we applied resampling only to the other two dimensions.
    (check resample_grid_except_slices)'''    
    x, y, z = [spacing[k] * np.arange(image.shape[k]) for k in range(3)]  # original grid in mm
    # we only interpolate in x direction (slices)
    y = np.arange(np.shape(image)[1])
    z = np.arange(np.shape(image)[2]) 
    f = scipy.interpolate.RegularGridInterpolator((x, y, z), image)#, method='linear')    # interpolator
#    print('Interpolating')

    dx, dy, dz = new_spacing    # new step sizes
    new_grid = np.mgrid[0:x[-1]:dx, 0:y[-1]+1:dy, 0:z[-1]+1:dz] # a '+1' is added
    new_grid = np.moveaxis(new_grid, (0, 1, 2, 3), (3, 0, 1, 2))  # reorder axes for evaluation
    imageOut = f(new_grid)
    
    # convert back to the same type as input (if it was an int, round first!)
    dataType = image.dtype
    if np.issubdtype(image[0,0,0],np.signedinteger) or np.issubdtype(image[0,0,0],np.unsignedinteger):
        imageOut = np.round(imageOut)
        
    imageOut = imageOut.astype(dataType)
    
    return imageOut, new_spacing 

In [None]:
def plot_resampled_block(df_coords_adjusted_, mask_resampled_, block_name_, pid_):
    df_block = df_coords_adjusted_.loc[df_coords_adjusted_['nodule_in_block']==int(block_name_[-1])]
    # Get the values from the DF
    df_number_of_nodules = np.unique(df_block.cluster_id.values)
    for nn in df_number_of_nodules:
        df_nodule_freeze=df_coords_adjusted_.loc[df_coords_adjusted_['cluster_id']==nn] 
        zz = int(np.mean(df_nodule_freeze['small_coordsZ_resampled']))
        xx = int(np.mean(df_nodule_freeze['small_coordsX']))
        yy = int(np.mean(df_nodule_freeze['small_coordsY']))
        # Get the resampled block
        rect = patches.Rectangle((np.maximum(yy-20,0),np.maximum(xx-20,0)),40,40,linewidth=1,edgecolor='r',facecolor='none')
        fig, ax = plt.subplots(1,1)
        ax.set_title(f'{pid_}_{block_name_}\n{zz,xx,yy}')
        ax.imshow(mask_resampled_[zz])
        ax.axis('off')
        ax.add_patch(rect)

In [None]:
idx_all, k_all = [], []
for idx, k in enumerate(LIDC_IDs):
    idx_all.append(idx)
    k_all.append(k)

In [None]:
idx_all[-5:], k_all[-5:]

In [None]:
# Get all the scans for X patient(s)
df = pd.read_csv('/data/datasets/LIDC-IDRI/annotations.csv')

scans_with_errors = []
errorScansFile = open(path_dest + "scans_with_errors.txt","w")

numVoxelsPerLungSeg = []
listOfRejectedPatients = []
rejectListFile = open(path_dest + "rejectedPatients.txt","w") 

listOfUsedPatients = []
useListFile = open(path_dest + "usedPatients.txt","w") 

requiredSelemWidth = []
selemZWidthFile = open(path_dest + "segmentationSelemZWidths.txt","w") 

for idx, k in enumerate(LIDC_IDs):
    # SCAN idx==41, 61 has an error
    #errors with: LIDC-IDRI-0011, LIDC-IDRI-0344, LIDC-IDRI-0361
#     if idx<=42:continue 
    if idx<932: continue
#     if idx==5: break
#     if idx <=10:continue
#     if idx ==300:break

    k = LIDC_IDs[idx]

    #if idx>5:break
    print(f'preprocessing: {idx}, {k}')
       
    df_patient = df.loc[df['patientid']==int(k[-4:])] 
    pid = k
    
    # query the LIDC images with patient_id = pid 
    # HERE WE JUST USE THE FIRST ONE!!
    idx_scan = 0 
    
    # get the scan object for this scan
    scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid)[idx_scan] 
    
    # here we can reject according to any criteria we like
    thickSlice = (scan.slice_thickness > 3) | (scan.slice_spacing > 3)
    missingSlices = len(np.unique(np.round(100*np.diff(scan.slice_zvals)))) != 1
    if (thickSlice)  :
        # we want to reject this scan/patient
        print('Undesirable slice characteristics, rejecting')
        listOfRejectedPatients.append(pid)
        continue
    elif (missingSlices):
        print('Missing slices, rejecting')
        listOfRejectedPatients.append(pid)
        continue
    else:
        # we will use this scan
        listOfUsedPatients.append(pid)
        #continue # this lets us quickly check the outcome of the selection
        
    # V3.6 REPEAT THE STEPS FROM INPAINTING TO GET THE TRANSFORMED COORDINATES
    path_data = path_data_alreadyprocessed
    try:
        vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, min_box_channels, min_box_x, min_box_y = read_slices3D_v3(path_data, pid);
    except FileNotFoundError: continue
    slice_middle = np.shape(vol_small)[0] // 2
    xmed_1, ymed_1, xmed_2, ymed_2 = erode_and_split_mask(mask_lungs_small,slice_middle)
    coord_min_side1, coord_max_side1, coord_min_side2, coord_max_side2 = nodule_right_or_left_lung(mask_maxvol_small, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2)
    try:
        c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2 = box_coords_contain_masks_right_size_search(coord_max_side2, coord_min_side2, 2, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2, mask_lungs_small)
        c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1 = box_coords_contain_masks_right_size_search(coord_max_side1, coord_min_side1, 1,  slice_middle, xmed_1, ymed_1, xmed_2, ymed_2, mask_lungs_small)
    except ValueError: continue
    block1, block1_mask, block1_mask_maxvol_and_lungs, block1_mask_lungs = get_four_blocks(vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1)
    block2, block2_mask, block2_mask_maxvol_and_lungs, block2_mask_lungs = get_four_blocks(vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2)
    blocks_ndl, blocks_ndl_mask, block_mask_maxvol_and_lungs, blocks_ndl_lungs, blocks_ndl_names, slice1, slice2 =  get_block_if_ndl(block1, block2, block1_mask, block2_mask, block1_mask_maxvol_and_lungs, block2_mask_maxvol_and_lungs, block1_mask_lungs, block2_mask_lungs)
    
    
    print('Loading and converting to HU')
    curr_patient_pixels, spacing_orig = custom_load_scan_to_HU(scan)

    print('Resampling to isotropic resolution')
    pix_resampled, spacing = resample_grid_except_slices(curr_patient_pixels, spacing_orig, [1,1,1])
    
#     print('Segmenting the lungs and dilating the mask')
#     try:
#         segmented_lungs_fill, requiredSelemWidthTmp = segment_lung_mask(pix_resampled, True)
#     except IndexError: continue
#     requiredSelemWidth.append(requiredSelemWidthTmp)
#     # Dilate the mask
#     selem = ball(1) # radius of 5 mm
#     dilated = dilation(segmented_lungs_fill, selem) # dilate a bit according to the tut
#     # Apply the mask
#     pix_resampled_to_use = pix_resampled*dilated
#     # count the number of lung voxels to find those which are badly segmented
#     numVoxelsPerLungSeg.append(np.count_nonzero(dilated))
    
#     print('Finding nodule masks')
#     # The mask
#     # put the mask on an array with the same shape as the original volume
#     one_segmentation_consensus = np.zeros_like(curr_patient_pixels)
#     one_segmentation_maxvol = np.zeros_like(curr_patient_pixels)
#     labelledNods = np.zeros_like(curr_patient_pixels)

    # get all the annotations for this scan
    ids = [i.id for i in scan.annotations] # this gives the annotation IDs (note that they are not in order in the annotations.csv)
     
    # we split the df for patient pid into the part for just this scan
    df_patient_partX = df_patient.loc[df_patient.annotation_id.isin(ids)]
    unique_nodules = np.unique(df_patient_partX['cluster_id'].values)
    nods = scan.cluster_annotations() # get the annotations for all nodules in this scan

    for idx_unique, unique_nodule in enumerate(unique_nodules):
        if idx_unique == 0:
            df_coords_adjusted = pd.DataFrame()
        #if idx_unique==1:break
        df_nodule = df_patient_partX.loc[df_patient_partX['cluster_id']==unique_nodule] # this gives all annotations for this nodule (cluster)
        
        # FIND THE TRANSFORMED COORDINATES 
        resampling_ratio = [j/i for i,j in zip(np.shape(curr_patient_pixels), np.shape(pix_resampled))]
        df_coords_adj_temp, coord_adj_Z, coord_adj_X, coord_adj_Y = nodule_coords_in_small_resampled_versions2(df_nodule, resampling_ratio, min_box_x, min_box_y, min_box_channels,
                                           slice_middle, xmed_1, ymed_1, xmed_2, ymed_2)
        df_coords_adjusted = df_coords_adjusted.append(df_coords_adj_temp)
        
    # RESAMPLE ALONG THE SLICES THE ALREADY INPAINTED IMAGES    
#     df_coords_adjusted.to_csv(f'{path_dest}pylidc_characteristics/{pid}.csv', index=False)
#     del block1, block2, vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small
    for id_block, (block, block_mask, block_maxvol_and_lungs, block_lungs, block_name) in enumerate(zip(blocks_ndl, blocks_ndl_mask, block_mask_maxvol_and_lungs, blocks_ndl_lungs, blocks_ndl_names)):
        print(block_name)
#         if id_block==1: break
        # Get the inpainted and original image and the mask
        try:
            last = np.load(f'{path_already_inpainted}last/{pid}_{block_name}.npy')
            last = np.squeeze(last)
            orig = np.load(f'{path_already_inpainted}orig/{pid}_{block_name}.npy')
            orig = np.squeeze(orig)
            mask = np.load(f'{path_already_inpainted}masks nodules/{pid}_{block_name}.npz')
            mask = mask.f.arr_0
            mask_lungs = np.load(f'{path_already_inpainted}masks lungs/{pid}_{block_name}.npz')
            mask_lungs = mask_lungs.f.arr_0
        except FileNotFoundError: continue
        
        last_resampled, spacing = resample_grid_slices(last, spacing_orig, [1,1,1])
        orig_resampled, spacing = resample_grid_slices(orig, spacing_orig, [1,1,1])
        mask_resampled, spacing = resample_grid_slices(mask, spacing_orig, [1,1,1])
        mask_lungs_resampled, spacing = resample_grid_slices(mask_lungs, spacing_orig, [1,1,1])
        
        np.save(f'{path_dest}arrays/last/{pid}_{block_name}.npy',last_resampled)
        np.save(f'{path_dest}arrays/orig/{pid}_{block_name}.npy',orig_resampled)
        np.savez_compressed(f'{path_dest}arrays/masks nodules/{pid}_{block_name}',mask_resampled)
        np.savez_compressed(f'{path_dest}arrays/masks lungs/{pid}_{block_name}',mask_lungs_resampled)
        
        df_coords_adjusted['small_coordsZ_resampled'] = df_coords_adjusted.small_coordsZ.values * spacing_orig[0]
        df_coords_adjusted.to_csv(f'{path_dest}pylidc_characteristics/{pid}.csv', index=False)
#         plot_resampled_block(df_coords_adjusted, mask_resampled, block_name, pid)
        
#         np.save(f'{path_dest}arrays/last/{name}_{block_name}.npy',image_last)
#         np.savez_compressed(f'{path_dest}arrays/masks nodules/{name}_{block_name}',block_mask)
#         np.savez_compressed(f'{path_dest}arrays/masks lungs/{name}_{block_name}',block_lungs)


#     print('Saving...')
    # now we save the results, saving each slice as a sparse array to cut down on size!
    # (currently just saving the last nodule per scan?)
#     if not os.path.exists(f'{path_dest}{k}/scans'): os.makedirs(f'{path_dest}{k}/scans')
#     if not os.path.exists(f'{path_dest}{k}/consensus_masks'): os.makedirs(f'{path_dest}{k}/consensus_masks')
#     if not os.path.exists(f'{path_dest}{k}/maxvol_masks'): os.makedirs(f'{path_dest}{k}/maxvol_masks')
#     if not os.path.exists(f'{path_dest}{k}/lung_masks'): os.makedirs(f'{path_dest}{k}/lung_masks')
#     if not os.path.exists(f'{path_dest}{k}/cluster_id_images'): os.makedirs(f'{path_dest}{k}/cluster_id_images')
#     if not os.path.exists(f'{path_dest}{k}/characteristics'): os.makedirs(f'{path_dest}{k}/characteristics')


#     for idj,(slice_pix, slice_mask_consensus, slice_mask_maxvol,slice_lungseg, slice_cluster_id_image) in enumerate(zip(pix_resampled_to_use, mask_consensus_resampled, mask_maxvol_resampled,dilated,labelledNods_resampled)):
#         sparse_matrix = scipy.sparse.csc_matrix(slice_pix)
#         sparse_matrix2 = scipy.sparse.csc_matrix(slice_mask_consensus)
#         sparse_matrix3 = scipy.sparse.csc_matrix(slice_mask_maxvol)
#         sparse_matrix4 = scipy.sparse.csc_matrix(slice_lungseg)
#         sparse_matrix5 = scipy.sparse.csc_matrix(slice_cluster_id_image)

#         scipy.sparse.save_npz(f'{path_dest}{k}/scans/slice_{idj:04d}.npz', sparse_matrix, compressed=True)
#         scipy.sparse.save_npz(f'{path_dest}{k}/consensus_masks/slice_m_{idj:04d}.npz', sparse_matrix2, compressed=True)
#         scipy.sparse.save_npz(f'{path_dest}{k}/maxvol_masks/slice_m_{idj:04d}.npz', sparse_matrix3, compressed=True)
#         scipy.sparse.save_npz(f'{path_dest}{k}/lung_masks/slice_m_{idj:04d}.npz', sparse_matrix4, compressed=True)
#         scipy.sparse.save_npz(f'{path_dest}{k}/cluster_id_images/slice_m_{idj:04d}.npz', sparse_matrix5, compressed=True)
        
    #v4 
    #resampling_ratio = [j/i for i,j in zip(np.shape(one_segmentation_maxvol), np.shape(mask_maxvol_resampled))]
    #df_patient = nodule_coords_in_small_resampled_versions(df_patient, resampling_ratio, min_box_x, min_box_y, min_box_channels)
    # Now we have the coordinates of the nodules in the smaller resampled volumes.
    # But these volumes are further cropped into smaller regions
    # df_patient.to_csv(f'{path_dest}pylidc_characteristics/{pid}.csv')

# #%% save some summary output
# np.savetxt(path_dest + 'segmentation_results.dat', numVoxelsPerLungSeg)

# np.savetxt(rejectListFile,listOfRejectedPatients,'%10s')
# rejectListFile.close()

# np.savetxt(useListFile,listOfUsedPatients,'%10s')
# useListFile.close()

# np.savetxt(selemZWidthFile,requiredSelemWidth,'%u')
# selemZWidthFile.close()

# np.savetxt(errorScansFile,scans_with_errors,'%10s')
# errorScansFile.close()

# #%% plot segmentation results
# # ax = plt.hist(numVoxelsPerLungSeg,100)
# # plt.xlabel('Number of voxels in segmentation')

In [None]:
/data/OMM/Datasets/LIDC_other_formats/LIDC_preprocessed_3D v5 - save pylidc chars only/arrays/last

In [None]:
df_coords_adjusted

In [None]:
np.shape(mask_resampled)

In [None]:
# Get the values from the DF
df_number_of_nodules = np.unique(df_coords_adjusted.cluster_id.values)
len(df_number_of_nodules)
df_nodule_freeze=df_coords_adjusted.loc[df_coords_adjusted['cluster_id']==df_number_of_nodules[-1]] 
zz = int(np.mean(df_nodule_freeze['small_coordsZ_resampled']))
xx = int(np.mean(df_nodule_freeze['small_coordsX']))
yy = int(np.mean(df_nodule_freeze['small_coordsY']))
# Get the resampled block
import matplotlib.patches as patches
rect = patches.Rectangle((np.maximum(yy-20,0),np.maximum(xx-20,0)),40,40,linewidth=1,edgecolor='r',facecolor='none')
fig, ax = plt.subplots(1,1)
ax.imshow(labeled[zz])
ax.add_patch(rect)

In [None]:
df_coords_adjusted.small_coordsZ.values

In [None]:
import matplotlib.patches as patches
labeled, n_items = ndimage.label(mask_resampled)
print(np.unique(labeled))
print(n_items)
z,x,y=np.where(labeled==1)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
rect = patches.Rectangle((yy-20,xx-20),40,40,linewidth=1,edgecolor='r',facecolor='none')
fig, ax = plt.subplots(1,1)
ax.imshow(labeled[zz])
ax.add_patch(rect)

In [None]:
import matplotlib.patches as patches
labeled, n_items = ndimage.label(one_segmentation_consensus)
print(np.unique(labeled))
print(n_items)
z,x,y=np.where(labeled==2)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
rect = patches.Rectangle((yy-20,xx-20),40,40,linewidth=1,edgecolor='r',facecolor='none')
fig, ax = plt.subplots(1,1)
ax.imshow(labeled[72])
ax.add_patch(rect)

In [None]:
labeled, n_items = ndimage.label(one_segmentation_maxvol)
print(np.unique(labeled))
print(n_items)
z,x,y=np.where(labeled==3)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(labeled[zz])

In [None]:
plt.imshow()

In [None]:
print(np.shape(vol_small))
slice_middle

In [None]:
xmed_1, xmed_2, ymed_1, ymed_2, slice_middle

In [None]:
print(df_nodule.lidc_coordY.values)
(df_nodule.lidc_coordY.values* resampling_ratio[2]) - np.min(min_box_y)

In [None]:
df_patient

In [None]:
df_nodule

In [None]:
z,x,y=np.where(block2_mask==1)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(block2_mask[zz])

In [None]:
z,x,y=np.where(block1_mask==1)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(block1_mask[zz])

In [None]:
labeled2, n_objects = ndimage.label(block2_mask)
n_objects

In [None]:
labeled1, n_objects = ndimage.label(block1_mask)
n_objects

In [None]:
z,x,y=np.where(labeled2==0)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(block2_mask[zz])

In [None]:
z,x,y=np.where(labeled1==0)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(block1_mask[zz])

In [None]:
df_patient

In [None]:
z,x,y=np.where(labeled2==0)
len(z)

In [None]:
np.unique(labeled2)

In [None]:
np.unique(labeled1)

In [None]:
np.shape(block1_mask)

In [None]:
ndimage.label?

In [None]:
np.shape(blocks_ndl), blocks_ndl_names

In [None]:
resampling_ratio

### tests to get the pylidc characteristics

In [None]:
resampling_ratio = [j/i for i,j in zip(np.shape(one_segmentation_maxvol), np.shape(mask_maxvol_resampled))]
resampling_ratio

In [None]:
path_data = path_data_alreadyprocessed
vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, min_box_channels, min_box_x, min_box_y = read_slices3D_v3(path_data, pid);

In [None]:
df_patient = nodule_coords_in_small_resampled_versions(df_patient, resampling_ratio, min_box_x, min_box_y, min_box_channels)

In [None]:
df_patient

In [None]:
print(np.shape(one_segmentation_maxvol))
print(np.shape(mask_maxvol_resampled))

In [None]:
z,x,y=np.where(one_segmentation_maxvol)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(f'medians = {zz,xx,yy}')
print(np.shape(one_segmentation_maxvol))
plt.imshow(one_segmentation_maxvol[zz])

In [None]:
z,x,y=np.where(mask_maxvol_resampled)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(f'medians = {zz,xx,yy}')
print(np.shape(mask_maxvol_resampled))
plt.imshow(mask_maxvol_resampled[zz])

In [None]:
np.shape(curr_patient_pixels), '->', np.shape(pix_resampled_to_use), 

In [None]:
RESAMPLING_RATIO_Z = np.shape(pix_resampled_to_use)[0] / np.shape(curr_patient_pixels)[0]
RESAMPLING_RATIO_X = np.shape(pix_resampled_to_use)[1] / np.shape(curr_patient_pixels)[1]
RESAMPLING_RATIO_Y = np.shape(pix_resampled_to_use)[2] / np.shape(curr_patient_pixels)[2]
RESAMPLING_RATIO_Z, RESAMPLING_RATIO_X, RESAMPLING_RATIO_Y

In [None]:
print(np.min(min_box_channels),np.max(min_box_channels))
print(np.min(min_box_x),np.max(min_box_x))
print(np.min(min_box_y),np.max(min_box_y))
MIN_BOX_Z = np.min(min_box_channels)
MIN_BOX_X = np.min(min_box_x)
MIN_BOX_Y = np.min(min_box_y)
MIN_BOX_Z, MIN_BOX_X, MIN_BOX_Y

In [None]:
print(cbbox)
center_nodule_channels = int(np.floor(np.mean([cbbox[2].start, cbbox[2].stop])))
center_nodule_x = int(np.floor(np.mean([cbbox[0].start, cbbox[0].stop])))
center_nodule_y = int(np.floor(np.mean([cbbox[1].start, cbbox[1].stop])))
print(center_nodule_channels, center_nodule_x, center_nodule_y)

In [None]:
cbbox

In [None]:
# Use RESAMPLING_RATIO_X, RESAMPLING_RATIO_Y and MIN_BOX_X, MIN_BOX_Y to get the coordinates in the small volume
COORDZ = int(np.floor(np.mean([cbbox[2].start, cbbox[2].stop])* RESAMPLING_RATIO_Z)) - MIN_BOX_Z
COORDX = int(np.floor(np.mean([cbbox[0].start, cbbox[0].stop])* RESAMPLING_RATIO_X)) - MIN_BOX_X
COORDY = int(np.floor(np.mean([cbbox[1].start, cbbox[1].stop])* RESAMPLING_RATIO_Y)) - MIN_BOX_Y
# These values should be very close to the ones obtained with the mask
print(COORDZ, COORDX, COORDY)

In [None]:
# VALUES OBTAINED WITH THE MASK
z,x,y=np.where(mask_maxvol_small)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(f'medians = {zz,xx,yy}')
plt.imshow(mask_maxvol_small[zz])

In [None]:
for block_name in blocks_ndl_names:
    if block_name=='block2':
        # Use RESAMPLING_RATIO_X, RESAMPLING_RATIO_Y and MIN_BOX_X, MIN_BOX_Y to get the coordinates in the small volume
        COORDZ = int(np.floor(np.mean([cbbox[2].start, cbbox[2].stop])* RESAMPLING_RATIO_Z)) - MIN_BOX_Z
        COORDX = int(np.floor(np.mean([cbbox[0].start, cbbox[0].stop])* RESAMPLING_RATIO_X)) - MIN_BOX_X
        COORDY = int(np.floor(np.mean([cbbox[1].start, cbbox[1].stop])* RESAMPLING_RATIO_Y)) - MIN_BOX_Y
        # These values should be very close to the ones obtained with the mask
        print(COORDZ, COORDX, COORDY)

In [None]:
print(COORDZ - c_zmin2)
print(COORDX - c_xmin2)
print(COORDY - c_ymin2)

In [None]:
coords_in_small_cube = np.asarray(COORDZ, COORDX, COORDY)
coords_center_cube1 = np.asarray(slice_middle, xmed_1, ymed_1)
coords_center_cube2 = np.asarray(slice_middle, xmed_2, ymed_2)
dist1 = np.linalg.norm(coords_in_small_cube - coords_center_cube1)
dist2 = np.linalg.norm(coords_in_small_cube - coords_center_cube2)
if dist1<dist2: print('1'); 
else: print('2')

In [None]:
df_nodule

In [None]:
df_patient['lidc_coordX'].values

In [None]:
df_nodule['lidc_coordX'].values

In [None]:
def nodule_coords_in_small_resampled_versions2(df, resampling_ratio, min_box_x, min_box_y, min_box_z,
                                              COORDZ, COORDX, COORDY, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2,
                                              coords_in_small_cube, coords_center_cube1, coords_center_cube2):
    '''
    Get the coordinates of the nodules in the smaller resampled volumes.
    This is done to be able to link each nodule to their pylidc labels
    We need to get into account the resampling ratio and the number of voxels used during the
    "Find the minimum box that contain the lungs" of the "read_slices3D_v3" function
    '''
    pd.options.mode.chained_assignment = None
    coords_in_small_cube = np.asarray(COORDZ, COORDX, COORDY)
    coords_center_cube1 = np.asarray(slice_middle, xmed_1, ymed_1)
    coords_center_cube2 = np.asarray(slice_middle, xmed_2, ymed_2)
    dist1 = np.linalg.norm(coords_in_small_cube - coords_center_cube1)
    dist2 = np.linalg.norm(coords_in_small_cube - coords_center_cube2)
    if dist1<dist2: 
        coord_adj_Z = ((np.mean(df['lidc_coordZ'].values) * resampling_ratio[0]) - np.min(min_box_z)) - c_zmin1
        coord_adj_X = ((np.mean(df['lidc_coordX'].values) * resampling_ratio[1]) - np.min(min_box_x)) - c_xmin1
        coord_adj_Y = ((np.mean(df['lidc_coordY'].values) * resampling_ratio[2]) - np.min(min_box_y)) - c_ymin1
        print('1', coord_adj_Z, coord_adj_X, coord_adj_Y)
        
    else: 
        coord_adj_Z = ((np.mean(df['lidc_coordZ'].values) * resampling_ratio[0]) - np.min(min_box_z)) - c_zmin2
        coord_adj_X = ((np.mean(df['lidc_coordX'].values) * resampling_ratio[1]) - np.min(min_box_x)) - c_xmin2
        coord_adj_Y = ((np.mean(df['lidc_coordY'].values) * resampling_ratio[2]) - np.min(min_box_y)) - c_ymin2
        print('2', coord_adj_Z, coord_adj_X, coord_adj_Y)
    
    #df['small_coordsZ']=
    #df['small_coordsX']=
    #df['small_coordsY']=
    return coord_adj_Z, coord_adj_X, coord_adj_Y


In [None]:
nodule_coords_in_small_resampled_versions2(df_nodule, resampling_ratio, min_box_x, min_box_y, min_box_channels,
                                           COORDZ, COORDX, COORDY, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2,
                                           coords_in_small_cube, coords_center_cube1, coords_center_cube2)

In [None]:
# Return the center of each lung (from the middle slice)
slice_middle = np.shape(vol_small)[0] // 2
xmed_1, ymed_1, xmed_2, ymed_2 = erode_and_split_mask(mask_lungs_small,slice_middle)
xmed_1, ymed_1, xmed_2, ymed_2, slice_middle

In [None]:
# For each nodule determine if its closer to the right or left cube center.
# Then return, for each side, the min and max coordianates of each nodule
coord_min_side1, coord_max_side1, coord_min_side2, coord_max_side2 = nodule_right_or_left_lung(mask_maxvol_small, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2)
coord_min_side1, coord_max_side1, coord_min_side2, coord_max_side2

In [None]:
c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2 = [], [], [], [], [], []
c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2 = box_coords_contain_masks_right_size_search(coord_max_side2, coord_min_side2, 2, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2, mask_lungs_small)
print(c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2)
print(c_zmax2 - c_zmin2, c_xmax2 - c_xmin2, c_ymax2 - c_ymin2)

In [None]:
c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1 = [], [], [], [], [], []
c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1 = box_coords_contain_masks_right_size_search(coord_max_side1, coord_min_side1, 1,  slice_middle, xmed_1, ymed_1, xmed_2, ymed_2, mask_lungs_small)
print(c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1)
print(c_zmax1 - c_zmin1, c_xmax1 - c_xmin1, c_ymax1 - c_ymin1)

In [None]:
# Block1 and Block2: lungs, ndl mask, lungs mask, ndl&lungs mask
block1, block1_mask, block1_mask_maxvol_and_lungs, block1_mask_lungs = get_four_blocks(vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1)
block2, block2_mask, block2_mask_maxvol_and_lungs, block2_mask_lungs = get_four_blocks(vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2)

In [None]:
z,x,y=np.where(block2_mask==1)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(block2_mask[zz])

In [None]:
blocks_ndl, blocks_ndl_mask, block_mask_maxvol_and_lungs, blocks_ndl_lungs, blocks_ndl_names, slice1, slice2 =  get_block_if_ndl(block1, block2, block1_mask, block2_mask, block1_mask_maxvol_and_lungs, block2_mask_maxvol_and_lungs, block1_mask_lungs, block2_mask_lungs)

In [None]:
z,x,y=np.where(blocks_ndl_mask[0]==1)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(blocks_ndl_mask[0][zz])

In [None]:
blocks_ndl_names

In [None]:
c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2

In [None]:
print(COORDZ-c_zmin2)
print(COORDX-c_xmin2)
print(COORDY-c_ymin2)

In [None]:
z,x,y=np.where(mask_maxvol_small)
zz = int(np.median(z))
xx = int(np.median(x))
yy = int(np.median(y))
print(zz,xx,yy)
plt.imshow(mask_maxvol_small[zz])

In [None]:
print(cbbox[2].start, cbbox[2].stop), 
center_nodule_channels = int(np.floor(np.mean([cbbox[2].start, cbbox[2].stop])))
center_nodule_x = int(np.floor(np.mean([cbbox[0].start, cbbox[0].stop])))
center_nodule_y = int(np.floor(np.mean([cbbox[1].start, cbbox[1].stop])))
print(center_nodule, center_nodule_x, center_nodule_y)

In [None]:
print(np.shape(mask_maxvol_small))
# To check that we are getting the right data 
# we get the center coords of the nodule and we substract the
fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(vol_small[center_nodule-np.min(min_box_channels)])
ax[1].imshow(mask_maxvol_small[center_nodule-np.min(min_box_channels)])
print((center_nodule_x-np.min(min_box_x))-((np.shape(curr_patient_pixels)[1] - np.shape(pix_resampled_to_use)[1])//2))
print((center_nodule_y-np.min(min_box_y))-((np.shape(curr_patient_pixels)[1] - np.shape(pix_resampled_to_use)[1])//2))
print(center_nodule_x)
print(center_nodule_y)
np.shape(curr_patient_pixels)[1] - np.shape(pix_resampled_to_use)[1]

In [None]:
df_patient_partX['malignancy'].values

In [None]:
cbbox

In [None]:
print(np.shape(pix_resampled_to_use))
plt.imshow(pix_resampled_to_use[90])

In [None]:
df_patient

In [None]:
df_patient_partX

In [None]:
slice_middle = np.shape(vol_small)[0] // 2
xmed_1, ymed_1, xmed_2, ymed_2 = erode_and_split_mask(mask_lungs_small,slice_middle)
xmed_1, ymed_1, xmed_2, ymed_2, slice_middle

In [None]:
slice_middle = np.shape(vol_small)[0] // 2
np.shape(mask_lungs_small), np.shape(vol_small), slice_middle

In [None]:
coord_min_side1, coord_max_side1, coord_min_side2, coord_max_side2 = nodule_right_or_left_lung(mask_maxvol_small, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2)
coord_min_side1, coord_max_side1, coord_min_side2, coord_max_side2

In [None]:
c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2 = [], [], [], [], [], []
c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2 = box_coords_contain_masks_right_size_search(coord_max_side2, coord_min_side2, 2, slice_middle, xmed_1, ymed_1, xmed_2, ymed_2, mask_lungs_small)
print(c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2)
print(c_zmax2 - c_zmin2, c_xmax2 - c_xmin2, c_ymax2 - c_ymin2)

In [None]:
c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1 = [], [], [], [], [], []
c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1 = box_coords_contain_masks_right_size_search(coord_max_side1, coord_min_side1, 1,  slice_middle, xmed_1, ymed_1, xmed_2, ymed_2, mask_lungs_small)
print(c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1)
print(c_zmax1 - c_zmin1, c_xmax1 - c_xmin1, c_ymax1 - c_ymin1)

In [None]:
# Block1 and Block2: lungs, ndl mask, lungs mask, ndl&lungs mask
block1, block1_mask, block1_mask_maxvol_and_lungs, block1_mask_lungs = get_four_blocks(vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, c_zmin1, c_zmax1, c_xmin1, c_xmax1, c_ymin1, c_ymax1)
block2, block2_mask, block2_mask_maxvol_and_lungs, block2_mask_lungs = get_four_blocks(vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small, c_zmin2, c_zmax2, c_xmin2, c_xmax2, c_ymin2, c_ymax2)

In [None]:
# Normalization is applied using the min and max of all images
block1 = (block1 - (-1018.0))/(1171.0-(-1018.0)) 
block1 = np.clip(block1,0,1)
block2 = (block2 - (-1018.0))/(1171.0-(-1018.0)) 
block2 = np.clip(block2,0,1)

In [None]:
# Apply lungs' mask
block1 = block1*block1_mask_lungs
block2 = block2*block2_mask_lungs

In [None]:
blocks_ndl, blocks_ndl_mask, block_mask_maxvol_and_lungs, blocks_ndl_lungs, blocks_ndl_names, slice1, slice2 =  get_block_if_ndl(block1, block2, block1_mask, block2_mask, block1_mask_maxvol_and_lungs, block2_mask_maxvol_and_lungs, block1_mask_lungs, block2_mask_lungs)

In [None]:
set_all_rcParams(False)
fig, ax = plt.subplots(2,5,figsize=(14,7))
ax[0,0].imshow(block1[slice1])
ax[0,1].imshow(block1_mask[slice1]) 
ax[0,2].imshow(block1_mask_maxvol_and_lungs[slice1])
ax[0,3].imshow(block1_mask_lungs[slice1])
ax[0,4].hist(block1.flatten(),bins=80);
ax[1,0].imshow(block2[slice2]) 
ax[1,1].imshow(block2_mask[slice2]) 
ax[1,2].imshow(block2_mask_maxvol_and_lungs[slice2])
ax[1,3].imshow(block2_mask_lungs[slice2])
ax[1,4].hist(block2.flatten(),bins=80);
fig.tight_layout()

##

In [None]:
slice_n = 90
print(f'original shape {np.shape(curr_patient_pixels), spacing}')
print(f'resampled shape {np.shape(pix_resampled)}')
print(np.shape(pix_resampled_to_use),np.shape(mask_maxvol_resampled))
fig, ax = plt.subplots(1,3, figsize=(14,5))
ax[0].imshow(curr_patient_pixels[slice_n])
ax[1].imshow(pix_resampled_to_use[slice_n])
ax[2].imshow(mask_maxvol_resampled[slice_n])

---

---

In [None]:
def make3d_from_sparse(path):
    slices_all = os.listdir(path)
    slices_all = np.sort(slices_all)
    for idx, i in enumerate(slices_all):
        sparse_matrix = sparse.load_npz(f'{path}{i}')
        array2d = np.asarray(sparse_matrix.todense())
        if idx == 0: 
            scan3d = array2d
            continue
        scan3d = np.dstack([scan3d,array2d])
    return scan3d

In [None]:
name = 'LIDC-IDRI-0001'

In [None]:
f'{path_data}{name}/scans/'

In [None]:
def read_slices3D_v2(path_data, ii_ids):
    """Read VOLUMES of lung, mask outside lungs and nodule, mask nodule, mask outside"""
    #ii_ids = f'LIDC-IDRI-{idnumber:04d}'
    print(f'reading scan {ii_ids}')
    vol = make3d_from_sparse(f'{path_data}{ii_ids}/scans/')
    mask = make3d_from_sparse(f'{path_data}{ii_ids}/consensus_masks/')
    mask_maxvol = make3d_from_sparse(f'{path_data}{ii_ids}/maxvol_masks/')
    mask_lungs = make3d_from_sparse(f'{path_data}{ii_ids}/lung_masks/')  
    # rearrange axes to slices first
    vol = np.swapaxes(vol,1,2)
    vol = np.swapaxes(vol,0,1)
    mask = np.swapaxes(mask,1,2)
    mask = np.swapaxes(mask,0,1)
    mask_maxvol = np.swapaxes(mask_maxvol,1,2)
    mask_maxvol = np.swapaxes(mask_maxvol,0,1)
    mask_lungs = np.swapaxes(mask_lungs,1,2)
    mask_lungs = np.swapaxes(mask_lungs,0,1)
    # Find the minimum box that contain the lungs 
    min_box = np.where(vol!=0)
    min_box_c = min_box[0]
    min_box_x = min_box[1]
    min_box_y = min_box[2]
    # Apply the minimum box to the vol and masks
    vol_small = vol[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
    mask_small = mask[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
    mask_maxvol_small = mask_maxvol[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
    mask_lungs_small = mask_lungs[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)] 
    # Get the mask_maxvol_small and the mask_lungs_small together
    mask_maxvol_and_lungs = 1- ((1-mask_lungs_small) | mask_maxvol_small)
    mask_lungs_small2 = mask_lungs_small | mask_maxvol_small
    return vol_small, mask_maxvol_small, mask_maxvol_and_lungs, mask_lungs_small2

In [None]:
vol_small, mask_maxvol_small, mask_maxvol_and_lungs_small, mask_lungs_small = read_slices3D_v2(path_dest, 'LIDC-IDRI-0001')

In [None]:
ii = 59+12
fig, ax = plt.subplots(1,4,figsize=(14,4))
ax[0].imshow(vol_small[ii])
ax[1].imshow(mask_maxvol_small[ii])
ax[2].imshow(mask_maxvol_and_lungs_small[ii])
ax[3].imshow(mask_lungs_small[ii])

In [None]:
ii = 59+12
fig, ax = plt.subplots(1,4,figsize=(14,4))
ax[0].imshow(vol_small[ii])
ax[1].imshow(mask_maxvol_small[ii])
ax[2].imshow(mask_maxvol_and_lungs_small[ii])
ax[3].imshow(mask_lungs_small[ii])

In [None]:
for i in tqdm_notebook(np.arange(1,10)):
    name = f'LIDC-IDRI-{i:04d}'
    print(name)
    vol_small, mask_maxvol_small, mask_maxvol_and_lungs, mask_lungs_small = read_slices3D(name)
    z,x,y = np.where(mask_maxvol_small==1)
    z_median = np.median(z)
    slice_n = int(z_median)
    fig, ax = plt.subplots(1,4, figsize=(14,5))
    ax[0].imshow(vol_small[slice_n], vmin=0, vmax=1)
    ax[1].imshow(mask_maxvol_small[slice_n])
    ax[2].imshow(mask_maxvol_and_lungs[slice_n])
    ax[3].imshow(mask_lungs_small[slice_n])

In [None]:
i = 'LIDC-IDRI-0001'
lungs = make3d_from_sparse(f'{path_dest}{i}/scans/')
mask = make3d_from_sparse(f'{path_dest}{i}/maxvol_masks/')
mask_lungs = make3d_from_sparse(f'{path_dest}{i}/lung_masks/')

In [None]:
np.where(mask==1)

In [None]:
# rearrange axes to slices first
vol = lungs
vol = np.swapaxes(vol,1,2)
vol = np.swapaxes(vol,0,1)
mask = np.swapaxes(mask,1,2)
mask = np.swapaxes(mask,0,1)
mask_lungs = np.swapaxes(mask_lungs,1,2)
mask_lungs = np.swapaxes(mask_lungs,0,1)
# Find the minimum box that contain the lungs 
min_box = np.where(vol!=0)
min_box_c = min_box[0]
min_box_x = min_box[1]
min_box_y = min_box[2]
vol_small = vol[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
# Apply the same minimum box to the mask
mask_small = mask[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
mask_lungs_small = mask_lungs[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
lungs = vol_small
mask = mask_small
mask_lungs = mask_lungs_small
np.shape(lungs), np.shape(mask), np.shape(mask_lungs)

In [None]:
slice_n=70
fig, ax = plt.subplots(1,3, figsize=(14,5))
ax[0].imshow(lungs[slice_n])
ax[1].imshow(mask[slice_n])
ax[2].imshow(mask_lungs_small[slice_n])