# Overview
Generate 2D slices of the nodules and their masks   

**v1**
1. Input: Reads a scan 3D volume (e.g. LIDC 3D reconstruction 80px v3 - with coords/3Dvolumes/)   
1. Output: It saves (in compressed format) the slice located at the middle of each nodule, the corresponding nodule mask (that includes all nodules in the slice and the area outside the lungs), the mask of only the nodule evaluated and the mask of only the area outside the lungs  
1. To do: Maybe dilate the nodule mask to remove nodule pixels that are not covered by the mask

In [None]:
import numpy as np
import scipy.sparse as sparse
import os
import matplotlib.pyplot as plt
import pandas as pd
from scipy import ndimage
from copy import copy
import time
from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from matplotlib import rcParams
from tqdm import tqdm_notebook
from scipy.ndimage.morphology import binary_dilation
%matplotlib inline

# Functions

In [None]:
nsh = np.shape

In [None]:
def plot_3d(image, threshold=-300, alpha=.70, fig_size=10):
    
    # Position the scan upright, 
    # so the head of the patient would be at the top facing the camera
    p = image.transpose(2,1,0)
    
    verts, faces, x,y = measure.marching_cubes_lewiner(p, threshold)

    fig = plt.figure(figsize=(fig_size, fig_size))
    ax = fig.add_subplot(111, projection='3d')

    # Fancy indexing: `verts[faces]` to generate a collection of triangles
    mesh = Poly3DCollection(verts[faces], alpha=alpha)
    face_color = [0.45, 0.45, 0.75]
    mesh.set_facecolor(face_color)
    ax.add_collection3d(mesh)

    ax.set_xlim(0, p.shape[0])
    ax.set_ylim(0, p.shape[1])
    ax.set_zlim(0, p.shape[2])

    plt.show()

In [None]:
def set_all_rcParams(true_or_false):
    rcParams['ytick.left']=true_or_false
    rcParams['xtick.bottom']=true_or_false
    rcParams['ytick.labelleft'] = true_or_false
    rcParams['xtick.labelbottom'] = true_or_false

In [None]:
def make3d_from_sparse(path):
    slices_all = os.listdir(path)
    slices_all = np.sort(slices_all)
    for idx, i in enumerate(slices_all):
        sparse_matrix = sparse.load_npz(f'{path}{i}')
        array2d = np.asarray(sparse_matrix.todense())
        if idx == 0: 
            scan3d = array2d
            continue
        scan3d = np.dstack([scan3d,array2d])
    return scan3d

In [None]:
def plot_3d(image, threshold=-300, alpha=.70, fig_size=10):
    
    # Position the scan upright, 
    # so the head of the patient would be at the top facing the camera
    p = image.transpose(2,1,0)
    
    verts, faces, x,y = measure.marching_cubes_lewiner(p, threshold)

    fig = plt.figure(figsize=(fig_size, fig_size))
    ax = fig.add_subplot(111, projection='3d')

    # Fancy indexing: `verts[faces]` to generate a collection of triangles
    mesh = Poly3DCollection(verts[faces], alpha=alpha)
    face_color = [0.45, 0.45, 0.75]
    mesh.set_facecolor(face_color)
    ax.add_collection3d(mesh)

    ax.set_xlim(0, p.shape[0])
    ax.set_ylim(0, p.shape[1])
    ax.set_zlim(0, p.shape[2])

    plt.show()

In [None]:
vols_with_segmentation_errors = ['LIDC-IDRI-0116', 'LIDC-IDRI-0136', 'LIDC-IDRI-0146', 
'LIDC-IDRI-0231', 'LIDC-IDRI-0304', 'LIDC-IDRI-0309', 'LIDC-IDRI-0332', 'LIDC-IDRI-0344', 
'LIDC-IDRI-0391', 'LIDC-IDRI-0473',  'LIDC-IDRI-0478', 'LIDC-IDRI-0537', 'LIDC-IDRI-0582', 
'LIDC-IDRI-0612', 'LIDC-IDRI-0621', 'LIDC-IDRI-0652', 'LIDC-IDRI-0655', 'LIDC-IDRI-0797', 
'LIDC-IDRI-0807', 'LIDC-IDRI-0864', 'LIDC-IDRI-0908', 'LIDC-IDRI-0918']

# Loop all patients all nodules

In [None]:
path_data = f'/data/OMM/Datasets/LIDC_other_formats/LIDC_preprocessed_3D v2/'
path_dest='/data/OMM/Datasets/LIDC_other_formats/LIDC slices inpainting v2 - maxvol dilated XX delete/'
files = os.listdir(path_data)
files = np.sort(files)

In [None]:
str_3D=np.array([[[1, 1, 1],[1, 1, 1],[1, 1, 1]],
                 [[1, 1, 1],[1, 1, 1],[1, 1, 1]],
                 [[1, 1, 1],[1, 1, 1],[1, 1, 1]]], dtype='uint8')

In [None]:
# For each patient, if there are nodules in the scan then get each nodule
# For each nodule get the slice that is in the middle of the nodule
# Save the slice, 
# Save the mask (it includes all nodules in the slice and the area outside lungs)
# Save the coords of the nodule we are focusing on (for the cases where there are nodules>1)
for idx, i in enumerate(files):
    # if idx==10: break
    idnumber = int(i.split('_')[0][-4:])
    df_annotations = pd.read_csv('/data/datasets/LIDC-IDRI/annotations.csv')
    df_patient = df_annotations.loc[df_annotations['patientid']==idnumber]
    nodules_per_patient = len(np.unique(df_patient['cluster_id'].values))
    print(f'nodules_in_patient {i}: {nodules_per_patient}')
    if nodules_per_patient >= 1:
        lungs = make3d_from_sparse(f'{path_data}{i}/scans/')
        mask = make3d_from_sparse(f'{path_data}{i}/maxvol_masks/')
        
        # rearrange axes to slices first
        vol = lungs
        vol = np.swapaxes(vol,1,2)
        vol = np.swapaxes(vol,0,1)
        mask = np.swapaxes(mask,1,2)
        mask = np.swapaxes(mask,0,1)
        # Find the minimum box that contain the lungs 
        min_box = np.where(vol!=0)
        min_box_c = min_box[0]
        min_box_x = min_box[1]
        min_box_y = min_box[2]
        vol_small = vol[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
        # Apply the same minimum box to the mask
        mask_small = mask[np.min(min_box_c):np.max(min_box_c),np.min(min_box_x):np.max(min_box_x),np.min(min_box_y):np.max(min_box_y)]
        lungs = vol_small
        mask = mask_small
        
        labeled, nr_objects = ndimage.label(mask, structure=str_3D) 
        for idj, j in enumerate(np.arange(1,nr_objects+1)):
            z, y, x = np.where(labeled == j)
            take_one_slice = int(np.median(z))
            mask_slice_nodule = mask[take_one_slice]
            mask_slice_nodule = binary_dilation(mask_slice_nodule)
            lungs_slice = lungs[take_one_slice]
            # Mask also the region outside the lungs
            outside_lungs_tmp = lungs_slice==0
            mask_slice = copy(mask_slice_nodule)
            mask_slice[outside_lungs_tmp] = 1
            # Sometimes one scan has two nodules but we focus on one at a time
            focus_one_nodule = copy(outside_lungs_tmp)*1
            focus_one_nodule[y,x]=2
            # To get the area outside the nodules we cannot only find the 0s (some nodules are 0s)
            # So we get the remove the nodule from the 
            outside_lungs = np.zeros_like(lungs_slice)
            str_2D = np.ones((3,3))
            labeled_outside, nr_objects_outside = ndimage.label(focus_one_nodule==1, structure=str_2D) 
            for j in np.arange(1,nr_objects_outside+1):
                area_x = np.sum(labeled_outside==j)
                if area_x > 3000:
                    outside_lungs[labeled_outside==j]=1
                    
            focus_one_nodule_with_zeros = np.zeros_like(outside_lungs)
            focus_one_nodule_with_zeros[y,x]=1
            name_main = i.split('_')[0]
            new_name = f'{name_main}_nodule_{idj:03d}.npz'
            np.savez_compressed(f'{path_dest}lungs/{new_name}', lungs_slice)
            np.savez_compressed(f'{path_dest}masks/{new_name}', mask_slice)
            np.savez_compressed(f'{path_dest}nodule to focus on/{new_name}', 
                                focus_one_nodule_with_zeros)
            np.savez_compressed(f'{path_dest}outside lungs mask/{new_name}',outside_lungs)

# Load saved files

In [None]:
new_name = 'LIDC-IDRI-0010_nodule_002.npz'
ll = np.load(f'{path_dest}lungs/{new_name}')
mm = np.load(f'{path_dest}masks/{new_name}')
nn = np.load(f'{path_dest}nodule to focus on/{new_name}')
mo = np.load(f'{path_dest}outside lungs mask/{new_name}')
ll = ll.f.arr_0
mm = mm.f.arr_0
nn = nn.f.arr_0
mo = mo.f.arr_0
nn_focus = nn+mm

In [None]:
set_all_rcParams(False)
fig, ax = plt.subplots(1,4,figsize=(14,14))
ax[0].imshow(ll)
ax[1].imshow(mm)
ax[2].imshow(nn_focus)
ax[3].imshow(mo)
fig.tight_layout()