In [None]:
import os
import pandas as pd
import pydicom as dicom
from glob import glob

# Fast Look into a Data Sample

In [None]:
file_path = '/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm'
data = dicom.dcmread(file_path)
data

We can access metadata by looping through elements:

In [None]:
for i,elem in enumerate(data.elements()):
     print(i, elem, '\t', type(elem))

More mteadata can be found in **'file_meta'** attribute:

In [None]:
list(data.file_meta.elements())

# Extracting the Meta-Data

In [None]:
def extractMetaData(file_path):
    data = dicom.dcmread(file_path)
    data.decode()  # to access the elements data should be decoded from raw format
    elements = {e.keyword:e.value for e in data.elements() if not e.name in ['Pixel Data', 'Private Creator', 'De-identification Method']}
    for e in elements:
        elements[e] = list(elements[e]) if isinstance(elements[e], dicom.multival.MultiValue) else str(elements[e])
    data.file_meta.decode()
    elements.update({e.keyword:str(e.value) for e in data.file_meta.elements()})
    elements['PrivateCreator'] = [e.value for e in data.elements() if e.name=='Private Creator']
    elements['DeidentificationMethodCodeSequence'] = [DMC.CodeValue for DMC in data.DeidentificationMethodCodeSequence]
    elements['file_path'] = file_path
    return elements

metainfo = extractMetaData(file_path)
metainfo

## important note
The study-id, series, and image-id are stored in the following attributes:

In [None]:
file_path, [metainfo[k] for k in ['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']]

# Making the Dataset

In [None]:
Files = glob('/kaggle/input/siim-covid19-detection/train/*/*/*.dcm')
len(Files)

In [None]:
Data = pd.DataFrame((extractMetaData(file_path) for file_path in Files))
print(Data.shape)
Data.head()

In [None]:
Data.to_csv('siim_2021_train_metadata.csv', index=False)

In [None]:
Files = glob('/kaggle/input/siim-covid19-detection/test/*/*/*.dcm')
Data_ts = pd.DataFrame((extractMetaData(file_path) for file_path in Files))
Data_ts.to_csv('siim_2021_test_metadata.csv', index=False)
Data_ts.shape