Let's create a CSV with all the metadata of the DICOM images so we can do a better analysis of that data.

# Import

In [1]:
import glob
import pandas as pd
import pydicom
import tqdm

import pdb

# Read loop

In [None]:
def get_data_dict(img):
    img_data = {}
    for i in img.iterall():
        if i.name == "Pixel Data":
            continue
        name = i.name.replace(" ", "_").replace("(", "").replace(")", "").lower()
        img_data[name] = i.value
    return img_data

def get_list_data(imgs):
    list_data = []
    for i in tqdm.tqdm(imgs):
        if i[-16:] == 'ID_6431af929.dcm': continue
        img = pydicom.read_file(i)
        img_data = get_data_dict(img)
        
        intercept = int(img[('0028','1052')].value)
        slope = int(img[('0028','1053')].value)
        pic = (img.pixel_array * slope + intercept)
        img_data['pxl_min'] = pic.min()
        img_data['pxl_max'] = pic.max()
        img_data['pxl_std'] = pic.std()
        img_data['pxl_mean'] = pic.mean()
        
        list_data.append(img_data)
    return list_data

def get_df_data(imgs):
    list_data = get_list_data(imgs)
    return pd.DataFrame(list_data)


In [None]:
input_path = "../input/rsna-intracranial-hemorrhage-detection"

train_imgs = glob.glob(f"{input_path}/stage_1_train_images/*")
test_imgs = glob.glob(f"{input_path}/stage_1_test_images/*")
#test_imgs2 = glob.glob(f"{input_path}/stage_2_test_images/*")

df_imgs = get_df_data(train_imgs)
df_imgs.to_csv("df_dicom_metadata.csv", index=False)

df_imgs = get_df_data(test_imgs)
df_imgs.to_csv("df_dicom_test_metadata.csv", index=False)

#df_imgs = get_df_data(test_imgs2)
#df_imgs.to_csv("df_dicom_test2_metadata.csv", index=False)