# Data preprocessing info for nifty data

I order to preprocess data from nifty to be able to use it for evaluation using a trained model the following steps need to be followed (based on information in readme and additional info regarding nifty files specifically).

Change the directories in the file "config.py" to correspond to your dataset file paths. Then run 
> preprocess.py 

with the function calling to preprocess_nifty (line 765). 

Once preprocess.py has been run, bounding box files have been created. 

Then change the paths below to correspond to the paths to the created bounding box data. Then run the following boxes to create a csv file in this script which is used during evaluation. 

Then you can run  

> python test.py eval

In [7]:
# import numpy as np
import os
import shutil
from utils.util import create_csv
import SimpleITK as sitk
import numpy as np

In [22]:
# Move data to correct folder prior to running preprocessing script

data_path = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3'
out_path = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test'

CT_path = os.path.join(data_path, 'LIDC-NIfTi')
nod_path = os.path.join(data_path, 'LIDC-NIfTi-Annotations')
lung_path = os.path.join(data_path, 'LIDC-NIfTi-Lung-Masks')

CT_out_path = os.path.join(out_path,'CT_scans' )
nod_out_path = os.path.join(out_path,'nodule_segs' )
lung_out_path = os.path.join(out_path,'lung_segs' )

skip_cases = ['LIDC-IDRI-0146', 'LIDC-IDRI-0418', 'LIDC-IDRI-0514' ,'LIDC-IDRI-0672', 'LIDC-IDRI-0979' , 'LIDC-IDRI-0123' ,'LIDC-IDRI-0267','LIDC-IDRI-0085']
        
i=0
for root, _, files in os.walk(CT_path):
    for file in files:
        print('File name:', file)
        idx = file.find('.')
        case_num = file[:idx]
        path_ct = os.path.join(root, file)
        stop = False
        for case_name in skip_cases:
            if case_name in path_ct:
                stop = True
                break
        if stop:
            continue


        idx = path_ct.find('.')
        idx2 = path_ct.find('LIDC-NIfTi/')
        path_part = path_ct[idx2+11:idx]
        # print(path_part)
        path_lung = os.path.join(lung_path, path_part + '-lung-mask.nii.gz')
        path_nod = os.path.join(nod_path, path_part + '_nodule_mask.nii.gz')
        # print('Lung\n', path_lung, '\nNod\n', path_nod)
        if os.path.exists(path_lung) and os.path.exists(path_nod):
            new_ct_path = os.path.join(CT_out_path, file)
            new_lung_path = os.path.join(lung_out_path, case_num + '-lung-mask.nii.gz')
            new_nod_path = os.path.join(nod_out_path, case_num + '_nodule_mask.nii.gz')

            print('Copying from path\n', path_ct, '\nTo path', new_ct_path)
            # shutil.copy(path_ct, new_ct_path)
            # shutil.copy(path_lung, new_lung_path)
            # shutil.copy(path_nod, new_nod_path)
            # if i >100:
            #     end
            # i+=1


File name: 01-01-2000-30178.nii.gz
File name: 01-01-2000-94866.nii.gz
File name: 01-01-2000-30141.nii.gz
File name: 01-01-2000-38612.nii.gz
Copying from path
 /media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0014/01-01-2000-38612.nii.gz 
To path /media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/CT_scans/01-01-2000-38612.nii.gz
File name: 01-01-2000-11146.nii.gz
Copying from path
 /media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0015/01-01-2000-11146.nii.gz 
To path /media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/CT_scans/01-01-2000-11146.nii.gz
File name: 01-01-2000-34278.nii.gz
Copying from path
 /media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0016/01-01-2000-34278.nii.gz 
To path /media/terese/New Volu

'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi-Lung-Masks/LIDC-IDRI-0014/01-01-2000-38612-lung-mask.nii.gz'

## Now run preprocessing script with the above data

In [8]:
# File paths of data of bounding boxes and corresponding CT scans
"""
:param data_names: lists of patient names
:param path_save: path to save the submission file
:param dir_bbox: directory saving predicted bounded boxes
:param preprocessed_dir: directory saving preprocessing results, should include *_origin.npy, *_spacing.npy,
                    *_ebox_origin.npy
:return: None
"""
image_data_path = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/CT_scans'

data_names = []
for file_name in os.listdir(image_data_path):
    idx = file_name.find('.')
    case_num = file_name[:idx]
    data_names.append(case_num)

path_save = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/bbox_data.csv'
dir_bbox = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/preprocessed_test'
preprocessed_dir = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/preprocessed_test'

In [9]:
# Saves csv file of bounding box data
create_csv(data_names, path_save, dir_bbox, preprocessed_dir, postfix='bboxes')

Saving submission to /media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test/bbox_data.csv


In [5]:

data_path = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3'
out_path = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/temp_cases_Fidanpreprocessed_test'

CT_path = os.path.join(data_path, 'LIDC-NIfTi')
nod_path = os.path.join(data_path, 'LIDC-NIfTi-Annotations')
lung_path = os.path.join(data_path, 'LIDC-NIfTi-Lung-Masks')

CT_out_path = os.path.join(out_path,'CT_scans' )
nod_out_path = os.path.join(out_path,'nodule_segs' )
lung_out_path = os.path.join(out_path,'lung_segs' )

skip_cases = ['LIDC-IDRI-0146', 'LIDC-IDRI-0418', 'LIDC-IDRI-0514' ,'LIDC-IDRI-0672', 'LIDC-IDRI-0979' , 'LIDC-IDRI-0123' ,'LIDC-IDRI-0267','LIDC-IDRI-0085']

skip = ['LIDC-IDRI-0001']
i=0
size_of_cases = []
for root, _, files in os.walk(CT_path):
    for file in files:
        print('File name:', file)
        idx = file.find('.')
        case_num = file[:idx]
        path_ct = os.path.join(root, file)
        stop = False
        for case_name in skip:
            if case_name in path_ct:
                stop = True
                break
        if stop:
            continue
        


        idx = path_ct.find('.')
        idx2 = path_ct.find('LIDC-NIfTi/')
        path_part = path_ct[idx2+11:idx]
        # print(path_part)
        path_lung = os.path.join(lung_path, path_part + '-lung-mask.nii.gz')
        path_nod = os.path.join(nod_path, path_part + '_nodule_mask.nii.gz')

        if os.path.exists(path_lung) and os.path.exists(path_nod):
            print(path_ct)
            CT = sitk.ReadImage(path_ct)
            lung = sitk.ReadImage(path_lung)
            nod = sitk.ReadImage(path_nod)

            if (CT.GetSize() != lung.GetSize()) or (CT.GetSize() != nod.GetSize()):
                size_of_cases.append([case_num, CT.GetSize(), lung.GetSize(), nod.GetSize()])

'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0001/01-01-2000-30178.nii.gz'
'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0001/01-01-2000-30178.nii.gz'




File name: 01-01-2000-30178.nii.gz
File name: 01-01-2000-94866.nii.gz
File name: 01-01-2000-30141.nii.gz
File name: 01-01-2000-38612.nii.gz
/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0014/01-01-2000-38612.nii.gz


RuntimeError: Exception thrown in SimpleITK ImageFileReader_Execute: /tmp/SimpleITK-build/ITK/Modules/IO/NIFTI/src/itkNiftiImageIO.cxx:1980:
ITK ERROR: ITK only supports orthonormal direction cosines.  No orthonormal definition found!

In [6]:
p = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0003/01-01-2000-94866.nii.gz'
# p = r'/media/terese/New Volume1/Lung_cancer_project/Nifty_data_LIDC-IDRI/LIDC-Fidan-3/LIDC-NIfTi/LIDC-IDRI-0001/01-01-2000-30178.nii.gz'


CT = sitk.ReadImage(p, sitk.sitkFloat32)