In [1]:
from datasets import load_dataset

import nibabel as nib
import scipy.ndimage

from nibabel.processing import resample_to_output
from concurrent.futures import ThreadPoolExecutor
from matplotlib import pyplot as plt
import numpy as np
import os
import glob
import tempfile
from natsort import natsorted
from ipywidgets import interact, IntSlider
import cv2

from monai.apps import download_and_extract

In [5]:
data_dir = '/home/pawel/Documents/RISA/3D_segmentation'

In [14]:
resource = "https://zenodo.org/records/10069289/files/AeroPath.zip?download=1"
md5 = "3fd5106c175c85d60eaece220f5dfd87"

compressed_file = os.path.join(data_dir, "AeroPath.zip")
if not os.path.exists(data_dir):
    download_and_extract(resource, compressed_file, data_dir, md5)

AeroPath.zip: 4.70GB [02:51, 29.4MB/s]                               

2024-06-23 12:55:50,366 - INFO - Downloaded: /home/pawel/Documents/RiSA/3D_segmentation/AeroPath/AeroPath.zip





2024-06-23 12:55:57,824 - INFO - Verified 'AeroPath.zip', md5: 3fd5106c175c85d60eaece220f5dfd87.
2024-06-23 12:55:57,825 - INFO - Writing into directory: /home/pawel/Documents/RiSA/3D_segmentation/AeroPath.


In [6]:
pattern = os.path.join(data_dir, 'AeroPath/**/*_CT_HR_label_airways.nii.gz')
train_airways = natsorted(glob.glob(pattern, recursive=True))
print(len(train_airways), train_airways)

pattern = os.path.join(data_dir, 'AeroPath/**/*_CT_HR_label_lungs.nii.gz')
train_lungs = natsorted(glob.glob(pattern, recursive=True))
print(len(train_lungs), train_lungs)

pattern = os.path.join(data_dir, 'AeroPath/**/*_CT_HR.nii.gz')
train_scans = natsorted(glob.glob(pattern, recursive=True))
print(len(train_scans), train_scans)

27 ['/home/pawel/Documents/RISA/3D_segmentation/AeroPath/1/1_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/2/2_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/3/3_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/4/4_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/5/5_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/6/6_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/7/7_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/8/8_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/9/9_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/10/10_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/AeroPath/11/11_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/Ae

In [4]:
for scan, lungs, airways in zip(train_scans, train_lungs, train_airways):
    scan_data = nib.load(scan)
    scan_data = scan_data.get_fdata()

    lungs_data = nib.load(lungs)
    lungs_data = lungs_data.get_fdata()

    airways_data = nib.load(airways)
    airways_data = airways_data.get_fdata()

    print(scan_data.shape, scan)

    # x, y, z = scan_data.shape

    # if x < 320:
    #     x_pad = 320 - x
    #     x_pad1 = x_pad // 2
    #     x_pad2 = x_pad - x_pad1
    #     lungs_data = np.pad(scan_data, ((x_pad1, x_pad2), (0, 0), (0, 0)), mode='constant', constant_values=0)
    #     airways_data = np.pad(scan_data, ((x_pad1, x_pad2), (0, 0), (0, 0)), mode='constant', constant_values=0)
    #     scan_data = np.pad(scan_data, ((x_pad1, x_pad2), (0, 0), (0, 0)), mode='constant', constant_values=-1024)

KeyboardInterrupt: 

In [7]:
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [8]:
# train_scans = [train_scans[9]]
# train_airways = [train_airways[9]]
# train_lungs = [train_lungs[9]]

scan_base_path = '../dataset/scan_cut'
airways_base_path = '../dataset/airways_cut'
lungs_base_path = '../dataset/lungs_cut'
create_directory(scan_base_path)
create_directory(airways_base_path)
create_directory(lungs_base_path)

for scan_path, airways_path, lungs_path in zip(train_scans, train_airways, train_lungs):
    print(scan_path, airways_path, lungs_path)
    scan = nib.load(scan_path)
    airways = nib.load(airways_path)
    lungs = nib.load(lungs_path)
    affine = scan.affine

    image_shape = lungs.shape
    print('Image shape before convert', image_shape)
    x, y, max_slices = image_shape

    min_slice = np.NaN
    max_slice = np.NaN

    min_x = np.NaN
    min_y = np.NaN

    max_x = np.NaN
    max_y = np.NaN

    scan_data = scan.get_fdata().astype(np.float32)
    airways_data = airways.get_fdata().astype(np.float32)
    lungs_data = lungs.get_fdata().astype(np.float32)

    # if x < 320:
    #     x_pad = 320 - x
    #     x_pad1 = x_pad // 2
    #     x_pad2 = x_pad - x_pad1
    #     lungs_data = np.pad(lungs_data, ((x_pad1, x_pad2), (0, 0), (0, 0)), mode='constant', constant_values=0)
    #     airways_data = np.pad(airways_data, ((x_pad1, x_pad2), (0, 0), (0, 0)), mode='constant', constant_values=0)
    #     scan_data = np.pad(scan_data, ((x_pad1, x_pad2), (0, 0), (0, 0)), mode='constant', constant_values=-1024)

    # if y < 160:
    #     y_pad = 160 - y
    #     y_pad1 = y_pad // 2
    #     y_pad2 = y_pad - y_pad1
    #     lungs_data = np.pad(lungs_data, ((0, 0), (y_pad1, y_pad2), (0, 0)), mode='constant', constant_values=0)
    #     airways_data = np.pad(airways_data, ((0, 0), (y_pad1, y_pad2), (0, 0)), mode='constant', constant_values=0)
    #     scan_data = np.pad(scan_data, ((0, 0), (y_pad1, y_pad2), (0, 0)), mode='constant', constant_values=-1024)

    # if max_slices < 320:
    #     z_pad = 320 - max_slices
    #     z_pad1 = z_pad // 2
    #     z_pad2 = z_pad - z_pad1
    #     lungs_data = np.pad(lungs_data, ((0, 0), (0, 0), (z_pad1, z_pad2)), mode='constant', constant_values=0)
    #     airways_data = np.pad(airways_data, ((0, 0), (0, 0), (z_pad1, z_pad2)), mode='constant', constant_values=0)
    #     scan_data = np.pad(scan_data, ((0, 0), (0, 0), (z_pad1, z_pad2)), mode='constant', constant_values=-1024)


    # image_shape = lungs.shape
    # x, y, max_slices = image_shape

    # print('Image shape after padding', scan_data.shape)

    scan_data[scan_data > 1024] = 1024
    scan_data[scan_data < -1024] = -1024
    # scan_data = (scan_data - np.min(scan_data)) / (np.max(scan_data) - np.min(scan_data))
    
    print('min max scan', np.min(scan_data), np.max(scan_data))
    print('min max airways', np.min(airways_data), np.max(airways_data))
    print('min max lungs', np.min(lungs_data), np.max(lungs_data))

    for slice in range(max_slices):
        lungs_slice = lungs_data[:, :, slice]
        airways_slice = airways_data[:, :, slice]
        
        if np.any(lungs_slice) > 0 or np.any(airways_slice > 0):
            if np.isnan(min_slice):
                min_slice = slice
            max_slice = slice
            
            for slice_data in [lungs_slice, airways_slice]:
                slice_data_uint8 = slice_data.astype(np.uint8)
                contours, _ = cv2.findContours(slice_data_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                
                if contours:
                    max_contour = max(contours, key=cv2.contourArea)
                    x, y, w, h = cv2.boundingRect(max_contour)
                    
                    min_x = x if np.isnan(min_x) else min(min_x, x)
                    min_y = y if np.isnan(min_y) else min(min_y, y)
                    max_x = x + w if np.isnan(max_x) else max(max_x, x + w)
                    max_y = y + h if np.isnan(max_y) else max(max_y, y + h)

    # if abs(max_x - min_x) < 320:
    #     differ = (320 - abs(max_x - min_x)) // 2
    #     max_x = np.clip(max_x + differ, 0, scan_data.shape[0])
    #     min_x = np.clip(min_x - differ, 0, scan_data.shape[0])
    #     if abs(max_x - min_x) < 320:
    #         max_x = np.clip(max_x + (320 - abs(max_x - min_x)), 0, scan_data.shape[0])

    # if abs(max_y - min_y) < 320:
    #     differ = (320 - abs(max_y - min_y)) // 2
    #     max_y = np.clip(max_y + differ, 0, scan_data.shape[1])
    #     min_y = np.clip(min_y - differ, 0, scan_data.shape[1])
    #     if abs(max_y - min_y) < 320:
    #         max_y = np.clip(max_y + (320 - abs(max_y - min_y)), 0, scan_data.shape[1])

    # if abs(min_slice - max_slice) < 320:
    #     print(abs(min_slice - max_slice))
    #     print(max_slice, min_slice)
    #     differ = (320 - abs(min_slice - max_slice)) // 2
    #     print(differ)
    #     max_slice = np.clip(max_slice + differ, 0, scan_data.shape[2])
    #     min_slice = np.clip(min_slice - differ, 0, scan_data.shape[2])
    #     if abs(min_slice - max_slice) < 320:
    #         max_slice = np.clip(max_slice + (320 - abs(min_slice - max_slice)), 0, scan_data.shape[2])

    #     print(max_slice, min_slice)

    print("Finish calculating bounding box")

    scan_data = scan_data[min_y:max_y, min_x:max_x, min_slice:max_slice]   
    print('Image shape after convert', scan_data.shape)    
    img = nib.Nifti1Image(scan_data, affine)
    resampled_scan = resample_to_output(img, [0.75, 0.75, 0.75], order=1)
    nib.save(resampled_scan, os.path.join(scan_base_path, scan_path.split('/')[-1]))

    airways_data = airways_data[min_y:max_y, min_x:max_x, min_slice:max_slice]
    img = nib.Nifti1Image(airways_data, affine)
    resampled_airways = resample_to_output(img, [0.75, 0.75, 0.75], order=1)
    # airways_data[airways_data > 0] = 1
    nib.save(resampled_airways, os.path.join(airways_base_path, airways_path.split('/')[-1]))

    lungs_data = lungs_data[min_y:max_y, min_x:max_x, min_slice:max_slice]
    img = nib.Nifti1Image(lungs_data, affine)
    resampled_lungs = resample_to_output(img, [0.75, 0.75, 0.75], order=1)
    # lungs_data[lungs_data > 0] = 1
    nib.save(resampled_lungs, os.path.join(lungs_base_path, lungs_path.split('/')[-1]))

    # print('min max scan', np.min(scan_data), np.max(scan_data))
    # print('min max airways', np.min(airways_data), np.max(airways_data))
    # print('min max lungs', np.min(lungs_data), np.max(lungs_data))

    print('Finish saving resampled images: ', os.path.join(scan_base_path, scan_path.split('/')[-1]),
                                                os.path.join(airways_base_path, airways_path.split('/')[-1]),
                                                os.path.join(lungs_base_path, lungs_path.split('/')[-1]))

/home/pawel/Documents/RISA/3D_segmentation/AeroPath/1/1_CT_HR.nii.gz /home/pawel/Documents/RISA/3D_segmentation/AeroPath/1/1_CT_HR_label_airways.nii.gz /home/pawel/Documents/RISA/3D_segmentation/AeroPath/1/1_CT_HR_label_lungs.nii.gz
Image shape before convert (512, 512, 767)
min max scan -1024.0 1024.0
min max airways 0.0 1.0
min max lungs 0.0 1.0
Finish calculating bounding box
Image shape after convert (388, 298, 650)
Finish saving resampled images:  ../dataset/scan_cut/1_CT_HR.nii.gz ../dataset/airways_cut/1_CT_HR_label_airways.nii.gz ../dataset/lungs_cut/1_CT_HR_label_lungs.nii.gz
/home/pawel/Documents/RISA/3D_segmentation/AeroPath/2/2_CT_HR.nii.gz /home/pawel/Documents/RISA/3D_segmentation/AeroPath/2/2_CT_HR_label_airways.nii.gz /home/pawel/Documents/RISA/3D_segmentation/AeroPath/2/2_CT_HR_label_lungs.nii.gz
Image shape before convert (512, 512, 829)
min max scan -1024.0 1024.0
min max airways 0.0 1.0
min max lungs 0.0 1.0
Finish calculating bounding box
Image shape after convert 

In [9]:
pattern = os.path.join(data_dir, '**/dataset/airways_cut/*_CT_HR_label_airways.nii.gz')
cut_airways = natsorted(glob.glob(pattern, recursive=True))
print(cut_airways)

pattern = os.path.join(data_dir, '**/dataset/lungs_cut/*_CT_HR_label_lungs.nii.gz')
cut_lungs = natsorted(glob.glob(pattern, recursive=True))
print(cut_lungs)

pattern = os.path.join(data_dir, '**/dataset/scan_cut/*_CT_HR.nii.gz')
cut_scans = natsorted(glob.glob(pattern, recursive=True))
print(cut_scans)

def save_patched_data(data, affine, base_path, idx, positions, prefix, patch_size=200):
    x, y, z = data.shape
    patches = [
        (data[:x//2-1, :, z//2:], f'{idx}_{positions[0]}_{prefix}.nii.gz'),
        (data[x//2:, :, z//2:], f'{idx}_{positions[1]}_{prefix}.nii.gz'),
        (data[:x//2-1, :, :z//2-1], f'{idx}_{positions[2]}_{prefix}.nii.gz'),
        (data[x//2:, :, :z//2-1], f'{idx}_{positions[3]}_{prefix}.nii.gz')
    ]
    for part, (patch, filename) in enumerate(patches):
        # patch[patch > 1024] = 1024
        # patch[patch < -1024] = -1024
        # patch = (patch - np.min(patch)) / (np.max(patch) - np.min(patch))
        # print("min: ", np.min(patch), "max: ", np.max(patch))
        # print("patched shape: ", patch.shape, "name: ", base_path, filename)
        img = nib.Nifti1Image(patch, affine)
        nib.save(img, os.path.join(base_path, positions[part], filename))

# scan_base_path = '../dataset/scan_patched_4'
# airways_base_path = '../dataset/airways_patched_4'
# lungs_base_path = '../dataset/lungs_patched_4'
# create_directory(scan_base_path)
# create_directory(airways_base_path)
# create_directory(lungs_base_path)

base_paths = {
    'scan': '../dataset/scan_patched_4',
    'airways': '../dataset/airways_patched_4',
    'lungs': '../dataset/lungs_patched_4'
}

positions = ['left_upper', 'right_upper', 'left_bottom', 'right_bottom']

for key, base_path in base_paths.items():
    for position in positions:
        path = f'{base_path}/{position}'
        create_directory(path)

for idx, (scan_path, airways_path, lungs_path) in enumerate(zip(cut_scans, cut_airways, cut_lungs)):
    scan = nib.load(scan_path)
    data_scan = scan.get_fdata()
    print("Input scan shape: ", data_scan.shape)
    print("min: ", np.min(data_scan), "max: ", np.max(data_scan))
    affine = scan.affine

    airways = nib.load(airways_path)
    data_airways = airways.get_fdata()
    print("Input label shape: ", data_airways.shape)
    # print("min: ", np.min(data_airways), "max: ", np.max(data_airways))

    lungs = nib.load(lungs_path)
    data_lungs= lungs.get_fdata()
    print("Input label shape: ", data_lungs.shape)
    # print("min: ", np.min(data_lungs), "max: ", np.max(data_lungs))

    save_patched_data(data_scan, affine, base_paths['scan'], idx, positions,  "CT_HR")
    save_patched_data(data_airways, affine, base_paths['airways'], idx, positions, "CT_HR")
    save_patched_data(data_lungs, affine, base_paths['lungs'], idx, positions, "CT_HR")


['/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/1_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/2_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/3_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/4_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/5_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/6_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/7_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/8_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/9_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/dataset/airways_cut/10_CT_HR_label_airways.nii.gz', '/home/pawel/Documents/RISA/3D_segmentation/data

In [10]:
def load_and_resample_images(image_paths, new_spacing=(0.75, 0.75, 0.75), order=1):
    resampled_images = []
    for image_path in image_paths:
        img = nib.load(image_path)
        resampled_img = resample_to_output(img, new_spacing, order=order)
        resampled_images.append(resampled_img)
    return resampled_images

def show_slice(images, slice_index):
    fig, ax = plt.subplots(2, 2, figsize=(6, 6))

    slice_index_0 = min(slice_index, images[0].shape[2] - 1)
    slice_index_1 = min(slice_index, images[1].shape[2] - 1)
    slice_index_2 = min(slice_index, images[2].shape[2] - 1)
    slice_index_3 = min(slice_index, images[3].shape[2] - 1)

    ax[0, 0].imshow(images[0].get_fdata()[:, :, slice_index_0], cmap='gray')
    plt.axis('off')

    ax[0, 1].imshow(images[1].get_fdata()[:, :, slice_index_1], cmap='gray')
    plt.axis('off')

    ax[1, 0].imshow(images[2].get_fdata()[:, :, slice_index_2], cmap='gray')
    plt.axis('off')

    ax[1, 1].imshow(images[3].get_fdata()[:, :, slice_index_3], cmap='gray')
    plt.axis('off')
    plt.show()

def interactive_viewer(resampled_images):
    image_shape = resampled_images[0].shape
    max_slices = image_shape[2]

    def view_image(slice_index):
        show_slice(resampled_images, slice_index)

    interact(view_image,
             slice_index=IntSlider(min=0, max=max_slices-1, step=1, description='Slice'))

train_images = ['../dataset/airways_patched_4/left_bottom/2_left_bottom_CT_HR.nii.gz',
                '../dataset/airways_patched_4/left_upper/2_left_upper_CT_HR.nii.gz',
                '../dataset/airways_patched_4/right_bottom/2_right_bottom_CT_HR.nii.gz',
                '../dataset/airways_patched_4/right_upper/2_right_upper_CT_HR.nii.gz']

resampled_images = load_and_resample_images(train_images)

interactive_viewer(resampled_images)

interactive(children=(IntSlider(value=0, description='Slice', max=196), Output()), _dom_classes=('widget-inter…