In [3]:
import os
import pydicom
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt


# Labels and meta information loading

In [4]:
images_info_path = "../data/raw/Train_Information/ProstateX-Images-Train.csv"
findings_info_path = "../data/raw/Train_Information/ProstateX-Findings-Train.csv"
mhd_images_info_path= "../data/raw/Train_Information/ProstateX-Images-KTrans-Train.csv"

In [5]:
images_info = pd.read_csv(images_info_path)
images_info.columns

Index(['ProxID', 'Name', 'fid', 'pos', 'WorldMatrix', 'ijk', 'TopLevel',
       'SpacingBetweenSlices', 'VoxelSpacing', 'Dim', 'DCMSerDescr',
       'DCMSerNum'],
      dtype='object')

In [6]:
findings_info = pd.read_csv(findings_info_path)
findings_info.columns

Index(['ProxID', 'fid', 'pos', 'zone', 'ClinSig'], dtype='object')

## Import MHD Information

In [7]:
mhd_image_info = pd.read_csv(mhd_images_info_path)
mhd_image_info["DCMSerDescr"] = "KTrans"
mhd_image_info.head()

Unnamed: 0,ProxID,fid,pos,WorldMatrix,ijk,DCMSerDescr
0,ProstateX-0000,1,25.7457 31.8707 -38.511,"1.5,3.00503e-010,0.00377059,-58.4317,-0.000597...",56 75 7,KTrans
1,ProstateX-0001,1,-40.5367071921656 29.320722668457 -16.70766907...,"1.5,3.07655e-010,-4.64581e-017,-116.977,-3.057...",51 61 6,KTrans
2,ProstateX-0002,1,-27.0102 41.5467 -26.0469,"1.5,3.07655e-010,7.98512e-017,-104.284,-2.9890...",52 73 11,KTrans
3,ProstateX-0002,2,-2.058 38.6752 -34.6104,"1.5,3.07655e-010,7.98512e-017,-104.284,-2.9890...",68 72 8,KTrans
4,ProstateX-0003,1,22.1495 31.2717 -2.45933,"1.5,3.07655e-010,3.75383e-017,-98.5005,-2.5684...",80 69 10,KTrans


In [8]:
mri_metadata = pd.merge(images_info, findings_info, left_on=["ProxID", "fid", "pos"], right_on=["ProxID", "fid", "pos"])


In [9]:
mhd_metadata = pd.merge(mhd_image_info, findings_info, left_on=["ProxID", "fid", "pos"], right_on=["ProxID", "fid", "pos"])
mhd_metadata.head()

Unnamed: 0,ProxID,fid,pos,WorldMatrix,ijk,DCMSerDescr,zone,ClinSig
0,ProstateX-0000,1,25.7457 31.8707 -38.511,"1.5,3.00503e-010,0.00377059,-58.4317,-0.000597...",56 75 7,KTrans,PZ,True
1,ProstateX-0001,1,-40.5367071921656 29.320722668457 -16.70766907...,"1.5,3.07655e-010,-4.64581e-017,-116.977,-3.057...",51 61 6,KTrans,AS,False
2,ProstateX-0002,1,-27.0102 41.5467 -26.0469,"1.5,3.07655e-010,7.98512e-017,-104.284,-2.9890...",52 73 11,KTrans,PZ,True
3,ProstateX-0002,2,-2.058 38.6752 -34.6104,"1.5,3.07655e-010,7.98512e-017,-104.284,-2.9890...",68 72 8,KTrans,PZ,False
4,ProstateX-0003,1,22.1495 31.2717 -2.45933,"1.5,3.07655e-010,3.75383e-017,-98.5005,-2.5684...",80 69 10,KTrans,TZ,False


In [8]:
metadata_labels = pd.concat([mri_metadata, mhd_metadata], sort= False)
metadata_labels[['i', 'j', 'k']] = metadata_labels["ijk"].str.split(" ", expand=True)
metadata_labels.head()

Unnamed: 0,ProxID,Name,fid,pos,WorldMatrix,ijk,TopLevel,SpacingBetweenSlices,VoxelSpacing,Dim,DCMSerDescr,DCMSerNum,zone,ClinSig,i,j,k
0,ProstateX-0000,ep2d_diff_tra_DYNDIST_ADC0,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0,223,84x128x19x1,ep2d_diff_tra_DYNDIST_ADC,7.0,PZ,True,36,72,9
1,ProstateX-0000,ep2d_diff_tra_DYNDIST0,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0,223,84x128x19x1,ep2d_diff_tra_DYNDIST,6.0,PZ,True,36,72,9
2,ProstateX-0000,ep2d_diff_tra_DYNDIST1,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0,223,84x128x19x1,ep2d_diff_tra_DYNDIST,6.0,PZ,True,36,72,9
3,ProstateX-0000,ep2d_diff_tra_DYNDIST2,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0,223,84x128x19x1,ep2d_diff_tra_DYNDIST,6.0,PZ,True,36,72,9
4,ProstateX-0000,ep2d_diff_tra_DYNDISTCALC_BVAL0,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0,223,84x128x19x1,ep2d_diff_tra_DYNDISTCALC_BVAL,8.0,PZ,True,36,72,9


In [9]:
metadata_labels

Unnamed: 0,ProxID,Name,fid,pos,WorldMatrix,ijk,TopLevel,SpacingBetweenSlices,VoxelSpacing,Dim,DCMSerDescr,DCMSerNum,zone,ClinSig,i,j,k
0,ProstateX-0000,ep2d_diff_tra_DYNDIST_ADC0,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0000,223,84x128x19x1,ep2d_diff_tra_DYNDIST_ADC,7.0,PZ,True,36,72,9
1,ProstateX-0000,ep2d_diff_tra_DYNDIST0,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0000,223,84x128x19x1,ep2d_diff_tra_DYNDIST,6.0,PZ,True,36,72,9
2,ProstateX-0000,ep2d_diff_tra_DYNDIST1,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0000,223,84x128x19x1,ep2d_diff_tra_DYNDIST,6.0,PZ,True,36,72,9
3,ProstateX-0000,ep2d_diff_tra_DYNDIST2,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0000,223,84x128x19x1,ep2d_diff_tra_DYNDIST,6.0,PZ,True,36,72,9
4,ProstateX-0000,ep2d_diff_tra_DYNDISTCALC_BVAL0,1,25.7457 31.8707 -38.511,"2,4.0067e-010,0.00377059,-46.6873,-0.000797221...",36 72 9,1.0,3.0000,223,84x128x19x1,ep2d_diff_tra_DYNDISTCALC_BVAL,8.0,PZ,True,36,72,9
5,ProstateX-0000,t2_tse_cor0,1,25.7457 31.8707 -38.511,"0.6,0,0,-57.5639,0,-0.190383,2.84497,29.839,0,...",139 166 12,1.0,3.0000,"0.6,0.6,3",320x320x15x1,t2_tse_cor,5.0,PZ,True,139,166,12
6,ProstateX-0000,t2_tse_sag0,1,25.7457 31.8707 -38.511,"0,0,-3.6,73.1629,0.5625,0,0,-67.018,0,-0.5625,...",176 169 13,1.0,3.6000,"0.5625,0.5625,3.6",320x320x19x1,t2_tse_sag,3.0,PZ,True,176,169,13
7,ProstateX-0000,t2_tse_tra0,1,25.7457 31.8707 -38.511,"0.5,1.00168e-010,0.00377059,-57.9373,-0.000199...",167 224 9,1.0,3.0000,"0.5,0.5,3",384x384x19x1,t2_tse_tra,4.0,PZ,True,167,224,9
8,ProstateX-0000,tfl_3d_PD_ref_tra_1_5x1_5_t30,1,25.7457 31.8707 -38.511,"1.5,3.00503e-010,0.00377059,-58.4317,-0.000597...",56 75 7,1.0,3.0000,"1.5,1.5,3",128x128x16x1,tfl_3d PD ref_tra_1.5x1.5_t3,9.0,PZ,True,56,75,7
9,ProstateX-0001,ep2d_diff_tra_DYNDIST_MIX_ADC0,1,-40.5367071921656 29.320722668457 -16.70766907...,"2,4.10207e-010,-4.64581e-017,-107.955,-4.0773e...",34 63 10,1.0,3.0000,223,84x128x19x1,ep2d_diff_tra_DYNDIST_MIX_ADC,8.0,AS,False,34,63,10


In [10]:
metadata_labels.to_csv("../data/interim/train_information.csv")