In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot
from pathlib import Path

In [2]:
images_train = pd.read_csv('../data/test_labels/ProstateX-Images-Train.csv')
ktrans_train = pd.read_csv('../data/test_labels/ProstateX-Images-KTrans-Train.csv')
findings = pd.read_csv('../data/test_labels/ProstateX-Findings-Train.csv')

In [3]:
# The usual check for successful conversions
dicom2nifti_success = Path.cwd().parent / 'dicom2nifti_successful.txt'
successful_conv = dicom2nifti_success.read_text()
successful_conv = successful_conv.split('\n')
successful_conv = list(filter(None, successful_conv)) # For sanity - remove any empty string(s)


# Build dataframe of t2 cases

# Generate a dictionary containing key = Patient_ID, value = t2_sequence_filename
root_dir = Path.cwd().parent
nifti = root_dir.joinpath('data/nifti')

def generate_data(sequence_type):
    
    patient_data = {}
    patient_folders = [x for x in nifti.iterdir() if x.is_dir()]
    for patient in patient_folders:
        if patient.stem in successful_conv: 
            sequences = [x for x in patient.iterdir() if x.is_dir()]
            for sequence in sequences:
                if sequence.parts[-1] == sequence_type:
                    for item in sequence.rglob('*.*'):
                        full_name = item.parts[-1]
                        split = full_name.split('.')
                        name_without_extension = split[0]
                        first_underscore = name_without_extension.find('_') + 1
                        patient_data[patient.parts[-1]] = name_without_extension[first_underscore:] # remove the number and underscore at the start
    return patient_data
                

In [4]:
t2_data = generate_data('t2')
t2_data_frame = pd.DataFrame.from_dict(t2_data, orient='index')
print(t2_data_frame.head())

                         0
ProstateX-0005  t2_tse_tra
ProstateX-0002  t2_tse_tra
ProstateX-0034  t2_tse_tra
ProstateX-0033  t2_tse_tra
ProstateX-0032  t2_tse_tra


In [5]:
bval_data = generate_data('bval')
bval_data_frame = pd.DataFrame.from_dict(bval_data, orient='index')
print(bval_data_frame.head())

                                                 0
ProstateX-0005  ep2d_diff_tra_dyndist_mixcalc_bval
ProstateX-0002      ep2d_diff_tra_dyndistcalc_bval
ProstateX-0034      ep2d_diff_tra_dyndistcalc_bval
ProstateX-0033      ep2d_diff_tra_dyndistcalc_bval
ProstateX-0032      ep2d_diff_tra_dyndistcalc_bval
