In [1]:
import nibabel as nib
import os
import pandas as pd
import shutil
import SimpleITK as sitk

from constants import Constants
constants = Constants()

In [2]:
def get_patientID(filename):
    return filename[:14]


def check_possibilities(filename, possible_substrings):
    for poss_str in possible_substrings:
        if poss_str in filename: return True
    return False


def is_t2(filename):
    possible_substrings = [
        r't2_tse_tra',
        r't2_tse_tra_320_p2',
        r't2_tse_tra_Grappa3'
    ]
    return check_possibilities(filename, possible_substrings)


def is_adc(filename):
    possible_substrings = [
        r'ep2d_diff_tra_DYNDIST_ADC', 
        r'ep2d_diff_tra_DYNDIST_MIX_ADC',
        r'diffusie-3Scan-4bval_fs_ADC',
        r'ep2d-advdiff-MDDW-12dir_spair_511b_ADC',
        r'ep2d-advdiff-3Scan-4bval_spair_511b_ADC',
        r'ep2d_DIFF_tra_b50_500_800_1400_alle_spoelen_ADC',
        r'diff tra b 50 500 800 WIP511b alle spoelen_ADC',
        r'ADC_S3_1'
    ]
    return check_possibilities(filename, possible_substrings)

    
def is_dwi(filename):
    possible_substrings = [
        r'ep2d_diff_tra_DYNDIST', 
        r'ep2d_diff_tra_DYNDIST_MIX',
        r'diffusie-3Scan-4bval_fs',
        r'ep2d-advdiff-3Scan-high bvalue 100',
        r'ep2d-advdiff-3Scan-high bvalue 500',
        r'ep2d-advdiff-3Scan-high bvalue 1400',
        r'ep2d_DIFF_tra_b50_500_800_1400_alle_spoelen',
        r'diff tra b 50 500 800 WIP511b alle spoelen',
        r'ep2d_diff_tra2x2_Noise0_FS_DYNDISTCALC_BVAL'
    ]
    return check_possibilities(filename, possible_substrings)
    

def is_unknown(filename):
    possible_substrings = [
        r'ep2d_diff_tra_DYNDISTCALC_BVAL', 
        r'ep2d_diff_tra_DYNDIST_MIXCALC_BVAL',
        r'tfl_3d PD ref_tra_1.5x1.5_t3',
        r'tfl_3d_PD_reference',
        r'fsCALC_BVAL'
    ]
    return check_possibilities(filename, possible_substrings)
    

def get_image_type(filename):
    if is_unknown(filename): return 'unknown'
    
    if is_t2(filename): return 'T2'
    if is_adc(filename): return 'ADC'
    if is_dwi(filename): return 'DWI'
    if 'Ktrans' in filename: return 'KTrans'
    
    return 'unknown'

# Preprocess data

In [3]:
ROOT = constants.dataset_path

# Path to unprocessed data
UNPROCESSED = constants.unprocessed


# Path to data converted using MRIcoGL
UNPROCESSED_NIFTI = constants.unprocessed_nifti
# Path to KTrans data (*.mhd)
UNPROCESSED_KTRANS = constants.unprocessed_ktrans


# Path to information
FINDINGS = constants.findings 
LESION_INFO = constants.plotting / 'info_df.pickle'

# Path to processed data
PROCESSED = constants.processed
if not PROCESSED.exists():
    os.mkdir(PROCESSED)

## Sort the data files

In [4]:
findings_data = []
patients = []
dwi_files = []

for file in os.listdir(UNPROCESSED_NIFTI):
    image_type = get_image_type(file)
    if image_type != 'unknown' and 'nii.gz' in file:
        patient_id = get_patientID(file)
        
        idx = int(patient_id[10:])
        if patient_id not in patients:
            patients.append(patient_id)
            findings_data.append({
                'ProxID'  : patient_id,
                'ClinSig' : [],
                'fid'     : [],
                'pos'     : [],
                'zone'    : [],
                'T2'      : [],
                'ADC'     : [],
                'DWI'     : []
            })
        
        if image_type == 'DWI' : dwi_files.append(file)
        else:
            src = UNPROCESSED_NIFTI / file
            dest = PROCESSED / file
            findings_data[idx][image_type].append(dest)
            
            shutil.copyfile(src, dest)
        
print(
    f'All NIFTI T2-weighted and ADC files were stored at path: {PROCESSED}\n'
    f'{len(dwi_files)} NIFTI DWI files were found.\n'
)

All NIFTI T2-weighted and ADC files were stored at path: data\processed
208 NIFTI DWI files were found.



## Separate DWI images

In [5]:
def save_img(data, path):
    new_data = data.get_fdata().copy()
    img = nib.Nifti1Image(new_data, data.affine, data.header)
    nib.save(img, path)

def separate_dwi(dwi_list):
    count_dwi = 0
    
    for dwi in dwi_list:
        patient_id = get_patientID(dwi)
        idx = int(patient_id[10:])
        
        dwi_file = UNPROCESSED_NIFTI / dwi
        
        dwi_load = nib.load(dwi_file)
        dwi_data = dwi_load.get_fdata()
        
        data_shape = len(dwi_data.shape)
        
        if data_shape == 4: 
            dwi_contrasts = (dwi_data.shape)[3]
            for i in range(dwi_contrasts):
                new_dwi_load = dwi_load.slicer[:,:,:,i]
                
                new_name = dwi.replace('.nii.gz',f'_{i}.nii.gz')
                dest = PROCESSED / new_name
                
                save_img(new_dwi_load, dest)
                findings_data[idx]['DWI'].append(dest)
                
                count_dwi +=1
                
        if data_shape == 3:
            dest = PROCESSED / dwi
            shutil.copyfile(dwi_file, dest)
            findings_data[idx]['DWI'].append(dest)
            count_dwi +=1
            
    print(f'Resulted {count_dwi} DWI files out of {len(dwi_list)} compressed DWI files.')
        
separate_dwi(dwi_files)

Resulted 620 DWI files out of 208 compressed DWI files.


## Convert KTrans

In [6]:
def convert_mhd_to_nifti(file, input_path, output_path):
    mhd_image = os.path.join(input_path, file)
    img = sitk.ReadImage(mhd_image)
    
    nii_filename = file.replace('.mhd', '.nii.gz')
    img_nii_path = output_path / nii_filename
    
    sitk.WriteImage(img, nii_filename)
    shutil.move(nii_filename, img_nii_path)
    return img_nii_path


for root, _, files in os.walk(UNPROCESSED_KTRANS):
    for file in files:
        image_type = get_image_type(file)
        if image_type == 'KTrans' and 'mhd' in file:
            patient_id = get_patientID(file)
            idx = int(patient_id[10:])
            findings_data[idx][image_type] = convert_mhd_to_nifti(file, root, PROCESSED)
            
print(f'All converted KTrans files were stored at path: {PROCESSED}')

All converted KTrans files were stored at path: data\processed


## Combine information

In [7]:
findings_df = pd.read_csv(FINDINGS)
lesion_info_df = pd.read_pickle(LESION_INFO)

for i, patient in enumerate(findings_df.ProxID):
    idx = int(patient[10:])
    
    findings_row = findings_df.iloc[i]
    lesion_info_row = lesion_info_df.iloc[idx]
    
    findings_data[idx]['ClinSig'].append(findings_row.ClinSig)
    findings_data[idx]['fid'].append(findings_row.fid)
    findings_data[idx]['pos'].append(findings_row.pos)
    findings_data[idx]['zone'].append(findings_row.zone)
    
    # voxel spacing and slices contianing prostate lesion
    findings_data[idx]['spacing'] = lesion_info_row.spacing
    findings_data[idx]['slices'] = lesion_info_row.fg_slices

findings = pd.DataFrame(
    findings_data, 
    columns=[
        'ProxID', 'ClinSig', 'fid', 'pos', 'zone', 'spacing', 'slices', 'T2', 'ADC', 'DWI', 'KTrans'
    ]
)

## Create pickle

In [8]:
findings_pickle = constants.labels_pkl

pd.to_pickle(findings, findings_pickle) 

print(f'The findings information can be found at: {findings_pickle}')

The findings information can be found at: data\processed\lesion_findings.pickle
