In [1]:
import os
import pandas as pd
import shutil
import SimpleITK as sitk

from constants import Constants
constants = Constants()

from pathlib import Path

In [2]:
def get_patientID(filename):
    return filename[:14]


def get_excluded_filenames(filename):
    excluded_filenames = not 'traa' in filename 
    excluded_filenames = excluded_filenames and not 'trab' in filename 
    excluded_filenames = excluded_filenames and not 'loc' in filename
    excluded_filenames = excluded_filenames and not 'ADCa' in filename
    excluded_filenames = excluded_filenames and not 'Noise0' in filename
    excluded_filenames = excluded_filenames and not 'S3_2' in filename
    excluded_filenames = excluded_filenames and not 'Eq_1' in filename
    excluded_filenames = excluded_filenames and not 'copy' in filename
    return excluded_filenames


def get_image_type(filename):
    excluded_filenames = get_excluded_filenames(filename)
    
    if 't2' in filename and 'tra' in filename and excluded_filenames:
        return 'T2'
    if 'ADC' in filename and excluded_filenames:
        return 'ADC'
    if 'Ktrans' in filename:
        return 'KTrans'
    return 'unknown'

# Preprocess data

In [3]:
ROOT = constants.dataset_path

# Path to unprocessed data
UNPROCESSED = ROOT / 'unprocessed'


# Path to data converted using MRIcoGL
UNPROCESSED_NIFTI = UNPROCESSED / 'NIFTI'
# Path to KTrans data (*.mhd)
UNPROCESSED_KTRANS = UNPROCESSED / 'KTrans'


# Path to Findings.csv
FINDINGS = constants.dataset_path / 'ProstateX-Findings-Train.csv'


# Path to processed data
PROCESSED = ROOT / 'processed'
if not PROCESSED.exists():
    os.mkdir(PROCESSED)

## Sort the data files

In [4]:
findings_data = []
patients = []

for file in os.listdir(UNPROCESSED_NIFTI):
    image_type = get_image_type(file)
    if image_type != 'unknown' and 'nii.gz' in file:
        patient_id = get_patientID(file)
        
        idx = int(patient_id[10:])
        if patient_id not in patients:
            patients.append(patient_id)
            findings_data.append({})
            findings_data[idx]['ProxID'] = patient_id
            findings_data[idx]['ClinSig'] = []
            
        src = UNPROCESSED_NIFTI / file
        dest = PROCESSED / file
        findings_data[idx][image_type] = dest
        
        shutil.copyfile(src, dest)
        
print(f'All NIFTI T2-weighted and ADC files were stored at path: {PROCESSED}')

All NIFTI T2-weighted and ADC files were stored at path: data\processed


## Convert KTrans

In [5]:
def convert_mhd_to_nifti(file, input_path, output_path):
    mhd_image = os.path.join(input_path, file)
    img = sitk.ReadImage(mhd_image)
    
    nii_filename = file.replace('.mhd', '.nii.gz')
    img_nii_path = output_path / nii_filename
    
    sitk.WriteImage(img, nii_filename)
    shutil.move(nii_filename, img_nii_path)
    return img_nii_path


for root, _, files in os.walk(UNPROCESSED_KTRANS):
    for file in files:
        image_type = get_image_type(file)
        if image_type == 'KTrans' and 'mhd' in file:
            patient_id = get_patientID(file)
            idx = int(patient_id[10:])
            findings_data[idx][image_type] = convert_mhd_to_nifti(file, root, PROCESSED)
            
print(f'All converted KTrans files were stored at path: {PROCESSED}')

All converted KTrans files were stored at path: data\processed


## Create pickle 

In [6]:
findings_pickle = PROCESSED / 'lesion_findings.pickle'
df = pd.read_csv(FINDINGS)

for i, patient in enumerate(df.ProxID):
    idx = int(patient[10:])
    row = df.iloc[i]
    
    findings_data[idx]['ClinSig'].append(row.ClinSig)

findings = pd.DataFrame(findings_data, columns=['ProxID', 'ClinSig', 'T2', 'ADC', 'KTrans'])
pd.to_pickle(findings, findings_pickle) 

print(f'The findings information can be found at: {findings_pickle}')
findings

The findings information can be found at: data\processed\lesion_findings.pickle


Unnamed: 0,ProxID,ClinSig,T2,ADC,KTrans
0,ProstateX-0000,[True],data\processed\ProstateX-0000_t2_tse_tra_t2_ts...,data\processed\ProstateX-0000_ep2d_diff_tra_ep...,data\processed\ProstateX-0000-Ktrans.nii.gz
1,ProstateX-0001,[False],data\processed\ProstateX-0001_t2_tse_tra_t2_ts...,data\processed\ProstateX-0001_ep2d_diff_tra_ep...,data\processed\ProstateX-0001-Ktrans.nii.gz
2,ProstateX-0002,"[True, False]",data\processed\ProstateX-0002_t2_tse_tra_t2_ts...,data\processed\ProstateX-0002_ep2d_diff_tra_ep...,data\processed\ProstateX-0002-Ktrans.nii.gz
3,ProstateX-0003,"[False, False]",data\processed\ProstateX-0003_t2_tse_tra_t2_ts...,data\processed\ProstateX-0003_ep2d_diff_tra_ep...,data\processed\ProstateX-0003-Ktrans.nii.gz
4,ProstateX-0004,[False],data\processed\ProstateX-0004_t2_tse_tra_t2_ts...,data\processed\ProstateX-0004_ep2d_diff_tra_ep...,data\processed\ProstateX-0004-Ktrans.nii.gz
...,...,...,...,...,...
199,ProstateX-0199,"[True, True]",data\processed\ProstateX-0199_t2_tse_tra_t2_ts...,data\processed\ProstateX-0199_diffusie-3Scan-4...,data\processed\ProstateX-0199-Ktrans.nii.gz
200,ProstateX-0200,"[True, False]",data\processed\ProstateX-0200_t2_tse_tra_t2_ts...,data\processed\ProstateX-0200_diffusie-3Scan-4...,data\processed\ProstateX-0200-Ktrans.nii.gz
201,ProstateX-0201,[True],data\processed\ProstateX-0201_t2_tse_tra_t2_ts...,data\processed\ProstateX-0201_diffusie-3Scan-4...,data\processed\ProstateX-0201-Ktrans.nii.gz
202,ProstateX-0202,"[True, False]",data\processed\ProstateX-0202_t2_tse_tra_t2_ts...,data\processed\ProstateX-0202_diffusie-3Scan-4...,data\processed\ProstateX-0202-Ktrans.nii.gz
