In [1]:
import os
import logging
import warnings
import numpy as np
import pandas
import SimpleITK as sitk

from config import config
from multiprocessing import Pool
from functools import partial
from pp_utils import resample, load_itk_image, display, get_box_from_mask, normalize, worldToVoxelCoord, plot_3d

%matplotlib inline

In [2]:
def savenpy_luna(id, annos, filelist, luna_segment, luna_data, savepath, resolution, force=False):
    
    name = filelist[id]
    bone_thresh = int(config['pp_bone_threshold'])
    pad_value = int(config['pp_pad_value'])
        
    if force != True:
        if os.path.exists(os.path.join(savepath,name+'_label.npy')) and os.path.exists(os.path.join(savepath,name+'.mhd')):
#             print(name +' had been done')
            return
    
#     try:
    # load lung segmentation mask data given with dataset
    mask, origin, spacing,_ = load_itk_image(os.path.join(luna_segment,name+'.mhd'))
    m1 = mask==3
    m2 = mask>=4
    mask = m1+m2
    mask,_ = resample(mask, spacing, resolution)
#     display(mask[int(np.floor(100*spacing[0]/resolution[0]))], 'mask')

    # calculate the inflated bounding box for lungs
    box_coord = get_box_from_mask(mask)

    # load scan
    scan_or, origin, spacing, isflip = load_itk_image(os.path.join(luna_data,name+'.mhd'))
    scan, new_spacing = resample(scan_or, spacing, resolution)
    scan = normalize(scan)
#         display(scan[int(np.floor(100*spacing[0]/resolution[0]))], 'scan')

    # clean scan + cut bbox
    clean_scan = (scan * mask + pad_value * (1 - mask)).astype('uint8')
#         np.save(os.path.join(savepath,name+'_clean.npy'), clean_scan)
    bones = (clean_scan * mask) > bone_thresh
    clean_scan[bones] = pad_value
    boxed_scan = clean_scan[box_coord[0,0]:box_coord[0,1],
                box_coord[1,0]:box_coord[1,1],
                box_coord[2,0]:box_coord[2,1]]
#         display(boxed_scan[int(np.floor(100*spacing[0]/resolution[0]))-box_coord[0,0]], int(np.floor(100*spacing[0]/resolution[0]))-box_coord[0,0])

    # add a 4th dimension before saving
#         save_scan = boxed_scan[np.newaxis,...]
#         np.save(os.path.join(savepath,name+'_clean.npy'), save_scan)
    itkimage = sitk.GetImageFromArray(boxed_scan)
    itkimage.SetSpacing(new_spacing)
    itkimage.SetOrigin([0,0,0])
    sitk.WriteImage(itkimage, os.path.join(savepath,name+'.mhd'), True) 

    this_annos = np.copy(annos[annos[:,0]==name])
    label = []
    if len(this_annos)>0:
        for c in this_annos:
#                 test = np.absolute(c[1:4][::-1] - origin) / spacing
#                 display(scan_or[int(test[0])], test)
            pos = worldToVoxelCoord(c[1:4][::-1], origin, spacing, resolution)
            if isflip:
                pos[1:] = clean_scan.shape[1:3]-pos[1:]
            label.append(np.concatenate([pos,[c[4]]]))
#                 display(scan[int(pos[0])], '', pos)

    label = np.array(label)
    if len(label)==0:
        label2 = np.array([[0,0,0,0]])
    else:
        label2 = np.copy(label).T
        label2[:3] = label2[:3]-np.expand_dims(box_coord[:,0],1)
        label2 = label2[:4].T
#             for it in label2:
#                 display(boxed_scan[int(it[0])], int(it[0]), '', it)
    np.save(os.path.join(savepath,name+'_label.npy'),label2)

#     except:
#         print('bug in '+name)
#         raise
    print(name+' done')

In [3]:
def preprocess_luna():
    luna_segment = config['luna_segment']
    savepath = config['preprocess_result_path']
    luna_data = config['luna_raw']
    luna_label = config['luna_label']
    resolution = np.fromstring(config['pp_resolution'][1:-1], sep=',')
    finished_flag = '.flag_preprocessluna'
    
    print('starting preprocessing luna')
    
#     if not os.path.exists(finished_flag):
    filelist = [f.split('.mhd')[0] for f in os.listdir(luna_data) if f.endswith('.mhd') ]
    annos = np.array(pandas.read_csv(luna_label))
    if not os.path.exists(savepath):
        os.mkdir(savepath)

    pool = Pool()
    partial_savenpy_luna = partial(savenpy_luna, annos=annos, filelist=filelist, luna_segment=luna_segment, 
                                   luna_data=luna_data, savepath=savepath, resolution=resolution)
    N = len(filelist)
    print('files: ' + str(N))
#     savenpy_luna(0, annos=annos, filelist=filelist, luna_segment=luna_segment, luna_data=luna_data, 
#                  savepath=savepath, resolution=resolution, force=True)
    _=pool.map(partial_savenpy_luna,range(N))
    pool.close()
    pool.join()
    print('end preprocessing luna')
#     f= open(finished_flag,"w+")

In [None]:
%%time

import warnings
warnings.filterwarnings("ignore")
logging.basicConfig(filename='pp_log.log', format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.WARNING)
preprocess_luna()

starting preprocessing luna
files: 884
1.3.6.1.4.1.14519.5.2.1.6279.6001.160124400349792614505500125883 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.319066480138812986026181758474 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.153181766344026020914478182395 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.885292267869246639232975687131 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.223650122819238796121876338881 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.107109359065300889765026303943 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.219087313261026510628926082729 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.315187221221054114974341475212 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.280072876841890439628529365478 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.397062004302272014259317520874 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.262873069163227096134627700599 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.613212850444255764524630781782 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.237915456403882324748189195892 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.31333405502967147

1.3.6.1.4.1.14519.5.2.1.6279.6001.280125803152924778388346920341 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.265570697208310960298668720669 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.183056151780567460322586876100 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.276351267409869539593937734609 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.249404938669582150398726875826 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.250397690690072950000431855143 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.215104063467523905369326175410 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.252634638822000832774167856951 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.300246184547502297539521283806 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.202476538079060560282495099956 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.211956804948320236390242845468 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.248360766706804179966476685510 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.310548927038333190233889983845 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.339484970190920330170416228517 done
1.3.6.1.4.1.14519.5.

1.3.6.1.4.1.14519.5.2.1.6279.6001.179730018513720561213088132029 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.745109871503276594185453478952 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.308153138776443962077214577161 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.316393351033132458296975008261 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.106630482085576298661469304872 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.111496024928645603833332252962 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.152706273988004688708784163325 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.244681063194071446501270815660 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.240969450540588211676803094518 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.334166493392278943610545989413 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.103115201714075993579787468219 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.174692377730646477496286081479 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.134370886216012873213579659366 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.237428977311365557972720635401 done
1.3.6.1.4.1.14519.5.

1.3.6.1.4.1.14519.5.2.1.6279.6001.842980983137518332429408284002 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.193808128386712859512130599234 done
Scan/mask ../luna/seg-lungs-LUNA16/1.3.6.1.4.1.14519.5.2.1.6279.6001.127965161564033605177803085629.mhd is flip!
1.3.6.1.4.1.14519.5.2.1.6279.6001.188265424231150847356515802868 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.259543921154154401875872845498 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.183924380327950237519832859527 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.220205300714852483483213840572 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.979083010707182900091062408058 done
Scan/mask ../luna/raw/1.3.6.1.4.1.14519.5.2.1.6279.6001.964952370561266624992539111877.mhd is flip!
Scan/mask ../luna/raw/1.3.6.1.4.1.14519.5.2.1.6279.6001.123697637451437522065941162930.mhd is flip!
Scan/mask ../luna/raw/1.3.6.1.4.1.14519.5.2.1.6279.6001.127965161564033605177803085629.mhd is flip!
1.3.6.1.4.1.14519.5.2.1.6279.6001.283569726884265181140892667131 done
1.3.6.1.4.1.14519.5.2.1.627

1.3.6.1.4.1.14519.5.2.1.6279.6001.910607280658963002048724648683 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.640729228179368154416184318668 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.466284753932369813717081722101 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.182192086929819295877506541021 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.307835307280028057486413359377 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.275755514659958628040305922764 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.265133389948279331857097127422 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.323753921818102744511069914832 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.259124675432205040899951626253 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.316911475886263032009840828684 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.145759169833745025756371695397 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.837810280808122125183730411210 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.550599855064600241623943717588 done
1.3.6.1.4.1.14519.5.2.1.6279.6001.143412474064515942785157561636 done
1.3.6.1.4.1.14519.5.

In [None]:
# savepath = config['preprocess_result_path']

# filelist = [f.split('.mhd')[0] for f in os.listdir(savepath) if f.endswith('.mhd') ]
# name = filelist[0]
# scan_or, origin, spacing, isflip = load_itk_image(os.path.join(savepath,name+'.mhd'))
# display(scan_or[169],169)