Let's create a CSV with all the metadata of the DICOM images so we can do a better analysis of that data.

# Import

In [1]:
import glob

import pandas as pd

import pydicom

import tqdm

# Read loop

In [2]:
# Paths
input_path = "../input/rsna-intracranial-hemorrhage-detection"
train_imgs = glob.glob(f"{input_path}/stage_1_train_images/*")
test_imgs = glob.glob(f"{input_path}/stage_1_test_images/*")

In [3]:
def save_value(img_data, name, value):
    if type(value) == pydicom.multival.MultiValue:
        for i, j in enumerate(value):
            save_value(img_data, f"{name}_{i}", j)
    else:
        if type(value) == pydicom.uid.UID:
            value = str(value)
        elif type(value) == pydicom.valuerep.DSfloat:
            value = float(value)
        img_data[name] = value

def get_data_dict(img):
    img_data = {}
    for i in img.iterall():
        if i.name == "Pixel Data":
            continue
        name = i.name.replace(" ", "_").replace("(", "").replace(")", "").lower()
        save_value(img_data, name, i.value)
        
    return img_data

def get_list_data(imgs):
    list_data = []
    for i in tqdm.tqdm(imgs):
        img = pydicom.read_file(i)
        img_data = get_data_dict(img)
        
        img_data["filename"] = i
        
        list_data.append(img_data)
    return list_data

def get_df_data(imgs):
    list_data = get_list_data(imgs)
    return pd.DataFrame(list_data)

df_imgs_train = get_df_data(train_imgs)
df_imgs_train.to_csv("df_dicom_metadata_train.csv", index=False)

df_imgs_test = get_df_data(test_imgs)
df_imgs_test.to_csv("df_dicom_metadata_test.csv", index=False)

100%|██████████| 78545/78545 [10:39<00:00, 122.83it/s]


In [4]:
df_imgs_train.head()

Unnamed: 0,sop_instance_uid,modality,patient_id,study_instance_uid,series_instance_uid,study_id,image_position_patient_0,image_position_patient_1,image_position_patient_2,image_orientation_patient_0,...,pixel_representation,window_center,window_width,rescale_intercept,rescale_slope,filename,window_center_0,window_center_1,window_width_0,window_width_1
0,ID_ff816e9b6,CT,ID_b14d854e,ID_c65a8f2f00,ID_79d2473b55,,-125.0,-86.586,-77.761,1.0,...,1,40.0,150.0,-1024.0,1.0,../input/rsna-intracranial-hemorrhage-detectio...,,,,
1,ID_282a7f3cd,CT,ID_79cf35b0,ID_9ee24d714d,ID_fe49a86511,,-131.052632,-6.157896,326.099976,1.0,...,0,,,-1024.0,1.0,../input/rsna-intracranial-hemorrhage-detectio...,36.0,36.0,80.0,80.0
2,ID_f1c65b76e,CT,ID_43dd2890,ID_43af13416f,ID_143018582f,,-114.5,25.5,91.900024,1.0,...,0,,,-1024.0,1.0,../input/rsna-intracranial-hemorrhage-detectio...,36.0,36.0,80.0,80.0
3,ID_3b59681d3,CT,ID_174f992b,ID_31f18d004c,ID_def4bb5674,,-125.0,-95.561577,145.102463,1.0,...,1,30.0,80.0,-1024.0,1.0,../input/rsna-intracranial-hemorrhage-detectio...,,,,
4,ID_f506d79aa,CT,ID_5758a541,ID_0f7a5b14c1,ID_a701a416fc,,-116.5,-10.5,42.400024,1.0,...,0,,,-1024.0,1.0,../input/rsna-intracranial-hemorrhage-detectio...,36.0,36.0,80.0,80.0
