In [None]:
from datasets import load_dataset
import nibabel as nib
from concurrent.futures import ThreadPoolExecutor
from matplotlib import pyplot as plt
import numpy as np
import os
import glob
from natsort import natsorted
from ipywidgets import interact, IntSlider
from monai.apps import download_and_extract
from monai.transforms import (
    Compose,
    CropForegroundd,
    LoadImaged,
    EnsureChannelFirstd,
    Orientationd,
)
from monai.data import Dataset, DataLoader, CacheDataset

In [None]:
data_dir = '/home/gasyna/RiSA_S3/3D_segmentation/'

In [None]:
resource = "https://zenodo.org/records/10069289/files/AeroPath.zip?download=1"
md5 = "3fd5106c175c85d60eaece220f5dfd87"

compressed_file = os.path.join(data_dir, "AeroPath.zip")
if not os.path.exists(data_dir):
    download_and_extract(resource, compressed_file, data_dir, md5)

In [None]:

pattern = os.path.join(data_dir, 'AeroPath/**/*_CT_HR_label_airways.nii.gz')
train_airways = natsorted(glob.glob(pattern, recursive=True))
print(len(train_airways), train_airways)

pattern = os.path.join(data_dir, 'AeroPath/**/*_CT_HR_label_lungs.nii.gz')
train_lungs = natsorted(glob.glob(pattern, recursive=True))
print(len(train_lungs), train_lungs)

pattern = os.path.join(data_dir, 'AeroPath/**/*_CT_HR.nii.gz')
train_scans = natsorted(glob.glob(pattern, recursive=True))
print(len(train_scans), train_scans)

In [None]:
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
prepare_data_transforms = Compose(
    [
    LoadImaged(keys=["scan", "lungs",  "airways"]),  # Load the NIfTI files
    EnsureChannelFirstd(keys=["scan", "lungs",  "airways"]),
    Orientationd(keys=["scan", "lungs",  "airways"], axcodes="RAS"),
    CropForegroundd(keys=["scan", "lungs",  "airways"], margin = 2, source_key="lungs"),
    ]
),

data_dicts = [
    {"scan": scan, "lungs": lungs, "airways": airways} for scan, lungs, airways in zip(train_scans, train_lungs, train_airways)
]


transformed_ds = Dataset(data=data_dicts, transform=prepare_data_transforms)


In [None]:
import nibabel as nib

for data, train_airway, train_lung, train_scan in zip(transformed_ds, train_airways, train_lungs, train_scans):

    scan, lungs, airways = data["scan"][0, :, :, :], data["lungs"][0, :, :, :], data["airways"][0, :, :, :]
    scan, lungs, airways = scan.numpy(), lungs.numpy(), airways.numpy()
    
    original_airway = nib.load(train_airway)
    original_lung   = nib.load(train_lung)
    original_scan   = nib.load(train_scan)

    airway_affine   = original_airway.affine
    lung_affine     = original_lung.affine
    scan_affine     = original_scan.affine


    dirname         = train_airway.split('/')[-2]

    filename_airway = train_airway.split('/')[-1]
    filename_lung   = train_lung.split('/')[-1]
    filename_scan   = train_scan.split('/')[-1]


    create_directory(f'../dataset_cut/{dirname}/')

    airways_nib = nib.Nifti1Image(airways, airway_affine)
    lungs_nib   = nib.Nifti1Image(lungs, lung_affine)
    scan_nib    = nib.Nifti1Image(scan, scan_affine)

    nib.save(airways_nib, f'../dataset_cut/{dirname}/{filename_airway}')
    nib.save(lungs_nib, f'../dataset_cut/{dirname}/{filename_lung}')
    nib.save(scan_nib, f'../dataset_cut/{dirname}/{filename_scan}')
