In [1]:
import os
from utils.dicom_to_nifti import dicom_read_LIDC_CT
from utils import convert
import SimpleITK as sitk
import numpy as np
import rt_utils

In [2]:
#Directory to original data main folder. Should contain case folders with dicom series CT scan data and RT struct data for masks
dir_base = r'/media/terese/New Volume1/Lung_cancer_project/ISP_dataset/DICOM_Data'


# Directory to save processed nifty files to, main folder. In this dir a similar case based folder structure will be made
output = r'/media/terese/New Volume1/Lung_cancer_project/ISP_dataset/NIFTY_Data'


In [None]:
## Run this cell to process all data in "dir_base" and save the output to "output". 
## For 200 cases this process will take 2-3 hours.
## Not yet implemented a script which works for files without RT struct files or 
## with multiple RT struct files. These cases will be skipped.

no_rt_struct_files = []
multi_rt_struct = []
multi_ct = []
for case_folder in os.listdir(dir_base):
    if len(case_folder) > 5:
        # If the folder name is more than just the case number it has no structure files. No not process struct files of these
        idx = case_folder.find('_')
        case_num = case_folder[:idx+1]
        no_rt_struct_files.append(case_num)
    else:
        print('case num: ', case_folder)
        

        CT_processed =False
        case_num = case_folder

        if case_num == '00184': # This case was indicated to not have the correct CT scan associated with it. Need to figure out the issue
            continue

        data_folders = os.listdir(os.path.join(dir_base, case_folder))
        for data_f in data_folders: #Folders separating CT scans from RT struct files in each case folder
            # print(len(data_f))
            files = os.listdir(os.path.join(dir_base, case_folder, data_f))
            if len(files) > 1:    #If there are more than 1 file it is the CT scan series
                if len(files)<100:
                    print(f'Case: {case_num} has more RT struct files than 1! N files in folder data_f: {len(files)}, {files}')
                    multi_rt_struct.append(case_num)
                    # raise f'Case: {case_num} has more RT struct files than 1! N files in folder data_f: {len(files)}, {files}'
                elif CT_processed:
                    print(f'Case {case_num} contains several folders with more than one file. Excluding from processing')
                    multi_ct.append(case_num)
                    continue
                CT_series_path = os.path.join(dir_base, case_folder, data_f)
                

                case_path = os.path.join(output, case_num)
                if not os.path.isdir(case_path):
                    os.mkdir(case_path)
                output_dir = os.path.join(case_path, 'images')
                if not os.path.isdir(output_dir):
                    os.mkdir(output_dir)
                if not os.path.exists(os.path.join(output_dir, case_num + '_image.nii.gz')):
                    CT_image = dicom_read_LIDC_CT(CT_series_path, output_dir = output_dir, case_num=case_num, save_ims=True)
                CT_processed = True
            elif len(files) == 1:  # If there is only one file it is the RT struct file
                print(f'Case {case_num} RT struct file')
                path = os.path.join(dir_base, case_folder, data_f)
                files = os.listdir(path)


                case_path = os.path.join(output, case_num)
                
                case_im_path = os.path.join(case_path, 'images')
                if not os.path.isdir(case_im_path):
                    os.mkdir(case_im_path)
                if os.path.exists(os.path.join(case_im_path, case_num + '_image.nii.gz')):
                    CT_image = sitk.ReadImage(os.path.join(case_im_path, case_num + '_image.nii.gz'))
                else: 
                    CT_image = dicom_read_LIDC_CT(CT_series_path, output_dir = case_im_path, case_num=case_num, save_ims=True)
                output_mask_dir = os.path.join(case_path, 'labels')
                # read_RT_struct(RT_struct_path, CT_series_path, CT_image, output_dir=output_mask_dir, save_num=case_num)
                
                RT_struct_path = os.path.join(path, files[0])
                mask_dir, ct_dir, CT_sitk, save_num = RT_struct_path, CT_series_path, CT_image, case_num

                if not os.path.exists(output_mask_dir):
                    os.makedirs(output_mask_dir)
                rtstruct = rt_utils.RTStructBuilder.create_from(
                dicom_series_path=ct_dir, 
                rt_struct_path=mask_dir
                )
                struct_names = rtstruct.get_roi_names()
                print(f'Number of files in rt struct {len(struct_names)}')
                im_orig = CT_sitk.GetOrigin()
                im_spacing = CT_sitk.GetSpacing()
                im_direction = CT_sitk.GetDirection()
                im_size = CT_sitk.GetSize()
                for struct_num, struct in enumerate(struct_names):
                    struct_array = rtstruct.get_roi_mask_by_number(struct_num)
                 
                    struct_sitk = convert.correct_struct_image(struct_array, CT_sitk)

                    
                    if 'Nodule' in struct:
                        # continue
                        struct = 'Nodule_seg_1'
                    elif 'Lung' in struct:
                        struct = struct[:5] + '_lung_seg'
                    else: 
                        raise f'Image is neither nodule nor lung. Struct name: {struct}'
                    filename = save_num + "_" + struct + ".nii.gz"
                    filepath = os.path.join(output_mask_dir, filename)
                    j=2
                    # While loop to create different names for each nodule segmentation
                    while os.path.exists(filepath) and ('Nodule' in struct):
                        struct = 'Nodule_seg_' + str(j)
                        filename = save_num + "_" + struct + ".nii.gz"
                        filepath = os.path.join(output_mask_dir, filename)
                        j+=1

                    print(f'Saving struct {struct} to {filepath}')
                    writer = sitk.ImageFileWriter()
                    writer.SetFileName(filepath)
                    writer.Execute(struct_sitk)

print(f'\n\n{len(no_rt_struct_files)} Files with no RT struct file: {no_rt_struct_files}')
print(f'\n{len(multi_rt_struct)} files with more than one RT struct file: {multi_rt_struct}')
if len(multi_ct)> 0:
    print(f'\n\n{len(multi_ct)} Files with multiple CT files: {multi_ct}')