## Subtract nodule from data

In [2]:
import nibabel as nib
from glob import glob
import os
import shutil
from tqdm import tqdm
import numpy as np
import pandas as pd
from PIL import Image

In [227]:
def normalize255(arr):
    return np.array(((arr - np.min(arr)) / (np.max(arr) - np.min(arr)))*255).astype(np.uint8)

def subtract_nodule(img, nod):
    return np.where(nod!=0, np.min(img), img)

In [5]:
with open('../train_val_test_v3.csv', 'r') as f:
    split_csv = pd.read_csv(f)

### Data (train or valid)

In [6]:
########## train
train_img_path = list(split_csv[split_csv.split=='train'].img_path.values)
img_path = train_img_path

In [297]:
########## valid
val_img_path = list(split_csv[split_csv.split=='val'].img_path.values)
img_path = val_img_path

### Save valid data .nii (delete nodule X)

In [301]:
########## for valid (delete X)

for idx in tqdm(range(len(img_path))):
    fname = img_path[idx].split('/')[-1]
    mask_path = sorted(glob(img_path[idx].replace('img', 'mask*')))
    if len(mask_path) == 0:
        shutil.copyfile(img_path[idx], f'../data/NII_normwinall_val_20mm/{fname}')
    else:
        shutil.copyfile(img_path[idx], f'../data/NII_normwinall_val_20mm/{fname}')  
        for mask_idx in range(len(mask_path)):
            mask_fname = mask_path[mask_idx].split('/')[-1]
            shutil.copyfile(mask_path[mask_idx], f'../data/NII_normwinall_val_20mm/{mask_fname}')  
            

100%|██████████| 192/192 [00:38<00:00,  4.95it/s]


### Save train, valid data .nii (delete nodule O)

In [None]:
########## for train, valid (delete)

for idx in tqdm(range(len(img_path))):
    fname = img_path[idx].split('/')[-1]
    mask_path = sorted(glob(img_path[idx].replace('img', 'mask*')))
    if len(mask_path) == 0:
        shutil.copyfile(img_path[idx], f'../data/NII_normwinall_train_10mm/{fname}')
    else:
        img = nib.load(img_path[idx]).get_fdata()
        
        for mask_idx in range(len(mask_path)):
            mask = nib.load(mask_path[mask_idx]).get_fdata()
            mask_10 = np.where(mask>=10, mask, 0)

            nod_pos = np.where(mask>=10)
            nod = mask[min(nod_pos[0]):max(nod_pos[0]), \
                       min(nod_pos[1]):max(nod_pos[1]), \
                       min(nod_pos[2]):max(nod_pos[2])]
            nod_shape = nod.shape

            # nodule width, height (10mm or 20mm)
            if (nod_shape[1] < 10) | (nod_shape[2] < 10):    
                img = subtract_nodule(img, mask)
            else:
                mask_fname = mask_path[mask_idx].split('/')[-1]
                mask_nib = nib.Nifti1Image(mask, affine=np.eye(4)) 
                nib.save(mask_nib, f'../data/NII_normwinall_train_10mm/{mask_fname}')
                
            
        img_nib = nib.Nifti1Image(img, affine=np.eye(4))
        nib.save(img_nib, f'../data/NII_normwinall_train_10mm/{fname}')


 74%|███████▍  | 569/765 [46:12<21:03,  6.44s/it]  

### Save train, valid data .png (delete nodule O)

In [None]:
# png 저장 (10mm, 20mm)

for idx in tqdm(range(len(img_path))):
    fname = img_path[idx].split('/')[-1].replace('.nii.gz','.png')
    mask_path = sorted(glob(img_path[idx].replace('img', 'mask*')))
    
    img = nib.load(img_path[idx]).get_fdata()
    if len(mask_path) != 0:
        img = nib.load(img_path[idx]).get_fdata()
        
        for mask_idx in range(len(mask_path)):
            mask = nib.load(mask_path[mask_idx]).get_fdata()
            mask_10 = np.where(mask>=10, mask, 0)

            nod_pos = np.where(mask>=10)
            nod = mask[min(nod_pos[0]):max(nod_pos[0]), \
                       min(nod_pos[1]):max(nod_pos[1]), \
                       min(nod_pos[2]):max(nod_pos[2])]
            nod_shape = nod.shape

            if (nod_shape[1] < 10) | (nod_shape[2] < 10):    #width, height
                img = subtract_nodule(img, mask)
            else:
                mask_fname = mask_path[mask_idx].split('/')[-1].replace('.nii.gz','.png')
                mean_mask = np.mean(mask_10.T, axis=2)
                mean_mask[mean_mask!=0]=255
                norm_mask=normalize255(mean_mask)               
                Image.fromarray(norm_mask).save(f'../data/PNG_normwinall_train_10mm/{mask_fname}')
            
    fimg = np.mean(img.T, axis=2)
    norm_fimg = normalize255(fimg)
    Image.fromarray(norm_fimg).save(f'../data/PNG_normwinall_train_10mm/{fname}')            

 11%|█▏        | 87/765 [03:51<43:16,  3.83s/it]  