### Directory of DICOM Files to Pandas Dataframe 
The below code will scan a folder for .dicom files and return you a Pandas Dataframe populated with the DICOM data.

In [None]:
import os
import collections
import ntpath
import pydicom
import pandas as pd
from tqdm.notebook import tqdm

"""
pandas_df_from_dicoms_folder:
params:
    folder: directory with dicom files
returns:
    pandas dataframe of the dicom files' data with filename as index

dataset_to_dic and data_element_to_dic from:
https://github.com/pydicom/contrib-pydicom/blob/master/plotting-visualization/dcm_qt_tree.py
"""

def data_element_to_dic(data_element):
    dic = collections.OrderedDict()
    if data_element.VR == "SQ":
        items = collections.OrderedDict()
        dic[data_element.name] = items
        i = 0
        for dataset_item in data_element:
            items['item ' + str(i)] = dataset_to_dic(dataset_item)
            i += 1
    elif data_element.name != 'Pixel Data':
        dic[data_element.name] = data_element.value
    return dic


def dataset_to_dic(dataset, index=None):
    dic = {}
    for data_element in dataset.file_meta:
        dic.update(data_element_to_dic(data_element))
    for data_element in dataset:
        dic.update(data_element_to_dic(data_element))
    if index is not None:
        dic.update({'index': index})
    return dic


def pandas_df_from_dicoms_folder(folder):
    dcm_files = [os.path.join(folder, f.name) for f in os.scandir(folder)]
    dcm_files = [x for x in dcm_files if x.endswith(('.dcm', '.dicom'))]
    
    pd_series_list = []
    for f in tqdm(dcm_files):
        dcm = pydicom.dcmread(f, stop_before_pixels=True)
        dic = dataset_to_dic(dcm, index=ntpath.basename(f))
        df = pd.Series(dic)
        pd_series_list.append(df)

    print('Done reading dicom data. Converting to dataframe...', end='')
    df = pd.concat([pd.DataFrame(x).T for x in pd_series_list])
    print('done.')
    df.index = df['index']
    df.index.name = None
    df = df.drop(columns=['index'])
    return df

In [None]:
### Usage ###
dicom_folder = '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection/train/'
df = pandas_df_from_dicoms_folder(dicom_folder)
df.index = df.index.str.replace('.dicom$', '', regex=True)
df