# 00_DICOM_to_NIFTI.ipynb

This notebook convert all the CT scans in DICOM format (.dmc) to NIFTI format(.nii). 

Original DICOM files are located in the raw data folder (`raw_data`). The converted NIFTI files are saved in a new directory named `CT_data/CT_NIFTI_full`. 

P.S. `*/*/DICOM/*/*/*/*` is in the format of:

`normal_pat/PatientName/DICOM/basename1/basename2/basename3/filename_3d`

- `normal_pat`: abnormal / normal_pat
- `PatientName`: Patient1 / Patient2 / ...
- `DICOM`: DICOM / other patient information (only `DICOM` is considered)
- `basename1`; `basename2`; `basename3`: Pseudo case names of the patient
- `filename_3d`: Pseudo name of the CT scan

In [None]:
import sys
repo_dir = <PATH TO THIS REPO> # local path to this repo.
sys.path.insert(0, repo_dir)

In [1]:
import dicom2nifti
import os
import tqdm
import glob
import pandas as pd

In [2]:
# Locating the folder paths used in this script.
work_dir = <PATH TO project_dir> # path to the directory of the current project (project master path)
data_path = work_dir + 'raw_data/' # path to original DICOM dataset
target_path = work_dir + 'CT_data/' + "CT_NIFTI_full/" # path to the converted NIFTI dataset 
target_abnormal_path = target_path + "abnormal/" # path to the converted NIFTI dataset (abnormal scans)
target_normal_path = target_path + "normal/" # path to the converted NIFTI dataset (normal scans)

In [None]:
# setup the list to record any fail convertions.
if os.path.exists(target_path+"fail_list.csv"):
    # load the existed fail list and save it to a list
    df_fail_orig = pd.read_csv(target_path+"fail_list.csv",index_col=[0])
    fail_list = df_fail_orig.fail_scan.to_list()
else:
    # create a empty fail_list
    fail_list = []

## Converte the abnormal scans

In [None]:
# loop through all the abnormal cases
for series_path in tqdm.tqdm(glob.glob(data_path + 'abnormal/*/DICOM/*/*/*/*')):
    series = series_path
    number_of_instance = len(os.listdir(series))
    if number_of_instance > 3: # avoid converting localizer
        try:
            seriescode = os.path.basename(series)
            seriesfilename = seriescode + ".nii"
            output_file = target_abnormal_path + seriesfilename
            if os.path.isfile(output_file):
                pass
            else:
                dicom2nifti.convert_dicom.dicom_series_to_nifti(series,output_file)
        except:
            print('Can not convert: ', seriescode)
            fail_list.append(seriescode)

## Converte the normal scans

In [None]:
# loop through all the normal cases
for series_path in tqdm.tqdm(glob.glob(data_path + 'normal/*/DICOM/*/*/*/*')):
    series = series_path
    number_of_instance = len(os.listdir(series))
    if number_of_instance > 3: # avoid converting localizer
        try:
            seriescode = os.path.basename(series)
            seriesfilename = seriescode + ".nii"
            output_file = target_normal_path + seriesfilename
            if os.path.isfile(output_file):
                pass
            else:
                dicom2nifti.convert_dicom.dicom_series_to_nifti(series,output_file)
        except:
            print('Can not convert: ', seriescode)
            fail_list.append(seriescode)

## Record the fail scans
Record which scans were failed during the convertion. Name of failed scan are saved in `fail_list.csv`.

In [None]:
# Record the fail list of files
dict={"fail_scan":list(set(fail_list))}
df_fail = pd.DataFrame(dict)
df_fail.to_csv(target_path+"fail_list.csv")