In [1]:
import os
import ast
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
import imageio
from tqdm import tqdm
import matplotlib.pyplot as plt

import sys
sys.path.append('../../')
from jsonio import load
sys.path.append('../')
from jit import _det_overlap,_iou

In [2]:
base = '/data/liumingzhou/CounterAlign_output/preprocess/multiple_slices'
splitfile = '../../single_slice/split_mindiam7.json'
todel = ['LIDC-IDRI-0634-0-0', 'LIDC-IDRI-0545-1-6'] # these two slices have no lung area

In [3]:
annofile = os.path.join(base, 'raw.csv')
npypath = os.path.join(base, 'raw')
annos = pd.read_csv(annofile)
split = load(splitfile)

In [4]:
savebase = os.path.join(base, 'mindiam{}_fulllung'.format(split['mindiam']), '{}')
saveimgbase = os.path.join(base, 'visulization_mindiam{}_fulllung'.format(split['mindiam']), '{}')
for subset in ['train', 'val', 'test']:
    os.makedirs(savebase.format(subset), exist_ok=True)
    os.makedirs(saveimgbase.format(subset), exist_ok=True)

#### process

In [5]:
def str2box(string,reversion=False):
    box = string[1:-1].split(',')
    if reversion:
        return [int(box[1]), int(box[0]), int(box[3]), int(box[2])]
    else:
        return [int(b) for b in box]

In [6]:
outputs = {'filename': [], 'malignancy': [], 'box': []}
for ind,row in tqdm(annos.iterrows(),total=annos.shape[0]):
    pid = '-'.join(row['filename'].split('-')[:-1])
    subset = 'none'
    for _subset in ['train', 'val', 'test']:
        if pid in split[_subset]:
            subset = _subset
    if subset=='none' or row['filename'] in todel:
        continue # not in train/val/test

    nid = '-'.join(row['filename'].replace('LIDC-IDRI-', '').split('-')[:-1])
    sliceind = int(row['slice'])
    malign = row['malignancy']
    xmin,ymin,xmax,ymax = str2box(row['box'],reversion=True)
    nodwidth = xmax - xmin; nodheight = ymax - ymin
    nodbox = [xmin, ymin, xmax, ymax]

    npy = np.load(os.path.join(npypath, row['filename']+'.npy'))
    img = Image.fromarray(npy)
    cropbox = str2box(row['lungbox']) # crop the lung (segmentation) area
    # a few lung is only segmented half, fix this problem
    iou = _iou(nodbox,cropbox)
    if iou==0:
        cropbox_width = cropbox[2] - cropbox[0]
        if nodbox[0]>cropbox[2]: # nodule at right lung, only left lung is segmented
            newxmax = cropbox[2] + cropbox_width
            cropbox[2] = max(newxmax,nodbox[2]+10)
        else:
            newxmin = cropbox[0] - cropbox_width
            cropbox[0] = min(newxmin,nodbox[0]-10)
            
        assert(_iou(nodbox,cropbox)!=0),'Lung segmentation error.'
        
    if cropbox[2]-cropbox[0]<50 and cropbox[3]-cropbox[1]<50:
        continue
    
    cropimg = img.crop(cropbox)
    cropnpy = np.array(cropimg)
    
    # where the nodule at the cropped image
    nodymin = nodbox[1] - cropbox[1]
    nodymax = nodbox[3] - cropbox[1]
    nodxmin = nodbox[0] - cropbox[0]
    nodxmax = nodbox[2] - cropbox[0]
    nodcropbox = [nodxmin, nodymin, nodxmax, nodymax]
    assert np.min(nodcropbox)>=0, '{} has nodcropbox: {}'.format(row['filename'],nodcropbox)
        
    np.save(os.path.join(savebase.format(subset), row['filename']), cropnpy)
    if ind%100==0 or 'LIDC-IDRI-0801-3-4' in row['filename']:
        pilimg = Image.fromarray(cropnpy*255).convert("RGB")
        drawer = ImageDraw.Draw(pilimg);
        drawer.rectangle([nodxmin - 10, nodymin - 10, nodxmax + 10, nodymax + 10], outline='red')
        pilnpy = np.array(pilimg).astype('uint8')
        imageio.imsave(os.path.join(saveimgbase.format(subset), row['filename'] + '.jpg'),pilnpy)
    
    outputs['filename'].append(row['filename'])
    outputs['box'].append(nodcropbox)
    if malign <= 3:
        outputs['malignancy'].append(0)
    else:
        outputs['malignancy'].append(1)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23848/23848 [00:36<00:00, 662.08it/s]


In [7]:
df = pd.DataFrame(outputs).set_index('filename')
df.to_csv(savebase.format('annos.csv'))