## Preprocessing
---
**Import Statements and Settings**

In [None]:
# Resamples CT scans and masks to target standardized voxel dimensions 

In [None]:
import os
import random
import numpy as np
from numpy import load
from numpy import savez_compressed
from scipy.ndimage import zoom
import SimpleITK as sitk
from tqdm import tnrange, tqdm_notebook
from multiprocessing import Pool

In [None]:
ct_dir = "" #path to directory with CT scans
mask_dir = "" #path to directory with masks 

In [None]:
ids = next(os.walk(mask_dir))[2]

In [None]:
# Select dimensions for CT/mask re-sampling 
resample_dims = (0.9765625*2.08,0.9765625*2.08,1.5*2.08) #low-resolution (for 192x160x80)
#resample_dims = (1,1,2) #high-resolution (for 192x192x96 model)
workers=38

In [None]:
def preprocess(_id):
    print(_id)
    pt_info = _id.replace(".npz","").split("_")
    ct_path = os.path.join(ct_dir,pt_info[0],pt_info[1],pt_info[1]+".mhd")
    mask_path = os.path.join(mask_dir, _id)

    mask = load(mask_path)['arr_0']

    ct = sitk.ReadImage(ct_path, sitk.sitkFloat32)
    ct_arr = sitk.GetArrayFromImage(ct)
    ct_arr = np.moveaxis(ct_arr,0,-1)
    ct_spacing = ct.GetSpacing()
    
    # Resample CT to specified target spacing 
    ct_arr = zoom(ct_arr, tuple([i/j for i,j in zip(ct_spacing, resample_dims)]))
    mask = zoom(mask, tuple([i/j for i,j in zip(ct_spacing, resample_dims)]))
    mask = np.round(mask)
    
    savez_compressed("" + _id, ct_arr) #path to save processed CTs
    savez_compressed("" + _id, mask) #path to save processed masks


In [None]:
def preprocess_mp(id_batch, workers=30):
    
 
    pool = Pool(processes=workers)

    pool.map(preprocess, id_batch)
    
    pool.close()
    

In [None]:
id_batches = [ids[i * workers:(i + 1) * workers] for i in range((len(ids) + workers - 1) // workers )] # split into chunks of size = workers

for i in tnrange(len(id_batches)):
    id_batch = id_batches[i]
    
    preprocess_mp(id_batch, workers)