In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
import pydicom
import os
import numpy as np
from matplotlib import pyplot, cm

#### Find all dicom files

In [None]:
# Get the paths to the dicom files
path_to_dicom_files = "D:\AIM_EXPORT"
dcm_files = []
for root, dirs, files in os.walk(path_to_dicom_files): # go to the files
    for filename in files:
        if ".dcm" in filename.lower(): # check if the file is a dicom file
            dcm_files.append(os.path.join(root,filename)) # save the complete path to the dicom files

In [None]:
len(dcm_files)

In [None]:
dcm_files[1:5]

In [None]:
# Read one file
img = pydicom.read_file(dcm_files[101])

In [None]:
img

# Access attributes of the dicom file: img.PatientID
# Patient ID: PatientID
# Series Description: SeriesDescription (Which sequence: DWI, FLAIR etc)
# Series Number: SeriesNumber
# Instance Number: InstanceNumber
# Rows: Rows (Number of pixels)
# Columns: Columns (Number of pixels)
# Pixel Data: PixelData (Array with the image)

In [None]:
# Find the names of the sequences

# descr = []
# for file in dcm_files[:10000]:
#     # read the image
#     img = pydicom.read_file(file)
#     if(hasattr(img,"SeriesDescription")): # check if attribute SeriesDescription even exists
#         descr.append(img.SeriesDescription)
# descr
# array(['<MIP Range>', '<MIP Range[1]>', '<MIP Range[2]>',
#        'ADC_OLEA_ANALYSIS', 'ADC_OLEA_COMPUTED', 'CBF_RGB', 'CBV_RGB',
#        'DWI_OLEA_COMPUTED', 'MIP Booster', 'MIP TOF 1', 'MIP TOF 2',
#        'MTT_OLEA_ANALYSIS', 'MTT_OLEA_COMPUTED', 'MTT_RGB',
#        'NOVA_MOVE_OLEA_COMPUTED', 'OLEA_STUDY_SETTINGS',
#        'OVERLAY_OLEA_ANALYSIS', 'PERFUSION_OLEA_COMPUTED',
#        'RESULTS_OLEA_ANALYSIS', 'SEGMENTATION_OLEA_COMPUTED',
#        'SURFACE_SEGMENTATION_OLEA_COMPUTED', 'SWI_Images',
#        'TMAX_OLEA_ANALYSIS', 'TMAX_OLEA_COMPUTED', 'TOF_3D_multi-slab',
#        'TTP_OLEA_ANALYSIS', 'TTP_OLEA_COMPUTED', 'TTP_RGB',
#        'Test_Bolus_cor', 'Testbolus_cor', 'Tirm_tra_fs_TI_2440',
#        'Tirm_tra_fs_TI_2500', 'ep2d_diff_3scan_trace_p2',
#        'ep2d_diff_3scan_trace_p2_ADC', 'ep2d_diff_M128_b0_1000_DIN',
#        'ep2d_diff_M128_b0_1000_DIN_ADC', 'ep2d_perf_p2',
#        'ep2d_perf_p2_GBP', 'ep2d_perf_p2_PBP', 'ep2d_perf_p2_TTP',
#        'fl3d-cor', 'fl3d-cor_SUB', 'fl3d-cor_SUB_MIP_COR', 'localizer',
#        'mIP_Images(SW)', 'rBF_OLEA_ANALYSIS', 'rBF_OLEA_COMPUTED',
#        'rBV_OLEA_ANALYSIS', 'rBV_OLEA_COMPUTED', 't1_fl2d_tra',
#        't1_se_r_tra_KM', 'tMIP_OLEA_COMPUTED', 'vessels_neck_MIP_SAG'],
#       dtype='<U34')

In [None]:
seq_of_interest = ["ep2d_diff_M128_b0_1000_DIN","ep2d_diff_M128_b0_1000_DIN_ADC", # DWI und ADC images (different names)
                   "ep2d_diff_3scan_trace_p2","ep2d_diff_3scan_trace_p2_ADC",
                   "resolve_3scan_trace_tra_176_p2_TRACEW","resolve_3scan_trace_tra_176_p2_ADC",
                   "ep2d_diff_4scan_trace_p2_s2_inkl.Neckcoil_TRACEW","ep2d_diff_4scan_trace_p2_s2_inkl.Neckcoil_ADC",
                   "ep2d_diff_3scan_trace_p3","ep2d_diff_3scan_trace_p3_ADC",
                   "ep2d_diffusion", "ep2d_diffusion_ADC",
                   # OLEA perfusion maps: tMIP nicht vorhanden
                   # Haben wir bisher angeschaut --> nehme die Karten
                   "MTT_OLEA_ANALYSIS","TMAX_OLEA_ANALYSIS","TTP_OLEA_ANALYSIS", 
                   "rBF_OLEA_ANALYSIS","rBV_OLEA_ANALYSIS"] #,
                   # OLEA perfusion maps (schwarz weiss)
                   # Alte Karten
                   #"MTT_OLEA_COMPUTED","TMAX_OLEA_COMPUTED","TTP_OLEA_COMPUTED",
                   #"rBF_OLEA_COMPUTED","rBV_OLEA_COMPUTED","tMIP_OLEA_COMPUTED"]

In [None]:
seq = np.empty((len(dcm_files)), dtype=np.object)
p_id = np.empty((len(dcm_files)), dtype=np.object)
rows = np.empty((len(dcm_files)), dtype=np.object)
cols = np.empty((len(dcm_files)), dtype=np.object)
instance_no = np.empty((len(dcm_files)), dtype=np.object)
path = np.empty((len(dcm_files)), dtype=np.object)

j=0
for i,file in enumerate(dcm_files): 
    print(i)
    # read the dicom file
    img=pydicom.read_file(file)
    if(hasattr(img, 'SeriesDescription')): #check if the attribute SeriesDescription is available
        if(img.SeriesDescription in seq_of_interest): # check if it is a DWI, ADC, etc.
            path[j]=file
            seq[j]=img.SeriesDescription
            # Patient ID: PatientID
            if(hasattr(img, 'PatientID')):
                p_id[j]=img.PatientID
            if(hasattr(img, 'Rows')):
                rows[j]=img.Rows
            if(hasattr(img, 'Columns')):
                cols[j]=img.Columns
            if(hasattr(img, 'InstanceNumber')):
                instance_no[j]=img.InstanceNumber
            j=j+1

In [None]:
import pandas as pd
dat = pd.DataFrame({"path":path,"sequence":seq,"p_id":p_id,"rows":rows,"columns":cols,
                    "instance_no":instance_no})

In [None]:
dat.dropna(axis=0, # rows
           how='all', # complete row has to be null
           thresh=None, 
           subset=None, 
           inplace=False) # changes in dataframe are only made if this is set to true
# Looks good

In [None]:
dat.dropna(axis=0, # rows
           how='all', # complete row has to be null
           thresh=None, 
           subset=None, 
           inplace=True) # changes in dataframe are only made if this is set to true

In [None]:
dat.to_csv("C:/Users/hezo/Dropbox/PhD/Stroke/Stroke_perfusion" + "/data/data_sept_2019.csv", index=False)

In [None]:
dat.to_csv("C:/Users/hezo/Dropbox/PhD/Stroke/Stroke_perfusion" + "/data_march_2019.csv", index=False)

### Check data

In [None]:
import pandas as pd
dat = pd.read_csv("C:/Users/hezo/Dropbox/PhD/Stroke/Stroke_perfusion/data/data_march_2019.csv", encoding='latin-1')

In [None]:
dat