# about
I extracted metadata from DICOM files. You can download csv file from my output.

# reference
- https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/252942

In [None]:
import numpy as np
import pandas as pd
import os
import pydicom

In [None]:
%ls ../input/rsna-miccai-brain-tumor-radiogenomic-classification

In [None]:
from pydicom import dcmread
data_dir = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification'
fpath = data_dir + '/train/00000/FLAIR/Image-1.dcm'
ds = dcmread(fpath)

In [None]:
# sample data
print(ds)

## Dicom metadata


In [None]:
from tqdm import tqdm

class Dicom:
    def __init__(self):
        self.fields = [
            'AccessionNumber',
            'AcquisitionMatrix',
            'B1rms',
            'BitsAllocated',
            'BitsStored',
            'Columns',
            'ConversionType',
            'DiffusionBValue',
            'DiffusionGradientOrientation',
            'EchoNumbers',
            'EchoTime',
            'EchoTrainLength',
            'FlipAngle',
            'HighBit',
            'HighRRValue',
            'ImageDimensions',
            'ImageFormat',
            'ImageGeometryType',
            'ImageLocation',
            'ImageOrientation',
            'ImageOrientationPatient',
            'ImagePosition',
            'ImagePositionPatient',
            'ImageType',
            'ImagedNucleus',
            'ImagingFrequency',
            'InPlanePhaseEncodingDirection',
            'InStackPositionNumber',
            'InstanceNumber',
            'InversionTime',
            'Laterality',
            'LowRRValue',
            'MRAcquisitionType',
            'MagneticFieldStrength',
            'Modality',
            'NumberOfAverages',
            'NumberOfPhaseEncodingSteps',
            'PatientID',
            'PatientName',
            'PatientPosition',
            'PercentPhaseFieldOfView',
            'PercentSampling',
            'PhotometricInterpretation',
            'PixelBandwidth',
            'PixelPaddingValue',
            'PixelRepresentation',
            'PixelSpacing',
            'PlanarConfiguration',
            'PositionReferenceIndicator',
            'PresentationLUTShape',
            'ReconstructionDiameter',
            'RescaleIntercept',
            'RescaleSlope',
            'RescaleType',
            'Rows',
            'SAR',
            'SOPClassUID',
            'SOPInstanceUID',
            'SamplesPerPixel',
            'SeriesDescription',
            'SeriesInstanceUID',
            'SeriesNumber',
            'SliceLocation',
            'SliceThickness',
            'SpacingBetweenSlices',
            'SpatialResolution',
            'SpecificCharacterSet',
            'StudyInstanceUID',
            'TemporalResolution',
            'TransferSyntaxUID',
            'TriggerWindow',
            'WindowCenter',
            'WindowWidth'
        ]

        self.fm_fields = [
            'FileMetaInformationGroupLength',
            'FileMetaInformationVersion',
            'ImplementationClassUID',
            'ImplementationVersionName',
            'MediaStorageSOPClassUID',
            'MediaStorageSOPInstanceUID',
            'SourceApplicationEntityTitle',
            'TransferSyntaxUID',
        ]

        self.metadata = []


    def get_meta_info(self, dicom):
        row = {f: dicom.get(f) for f in self.fields}
        row_fm = {f: dicom.file_meta.get(f) for f in self.fm_fields}
        row_other = {
            'is_original_encoding': dicom.is_original_encoding,
            'is_implicit_VR': dicom.is_implicit_VR,
            'is_little_endian': dicom.is_little_endian,
            'timestamp': dicom.timestamp,
        }
        return {**row, **row_fm, **row_other}


    def get_dicom_files(self, input_dir, ds='train'):
        dicoms = []

        for subdir, dirs, files in os.walk(f"{input_dir}/{ds}"):
            for filename in files:
                filepath = subdir + os.sep + filename

                if filepath.endswith(".dcm"):
                    dicoms.append(filepath)

        return dicoms


    def process_dicom(self, dicom_src):
        dicom = pydicom.dcmread(dicom_src)
        file_data = dicom_src.split("/")
        file_src = "/".join(file_data[-4:])

        tmp = {"BraTS21ID": file_data[-3], "dataset": file_data[-4], "type": file_data[-2], "dicom_src": f"./{file_src}"}
        tmp.update(self.get_meta_info(dicom))

        return tmp


    def update(self, res):
        if res is not None:
            self.metadata.append(res)

    def error(self,e):
        print(e)


    def runner(self, input, output, dataset, debug=0):
        
        dicom_files = self.get_dicom_files(input, dataset)

        if debug!=0:
            for dicom_file in tqdm(dicom_files[:10]):
                self.metadata.append(self.process_dicom(dicom_file))
        else:
            for dicom_file in tqdm(dicom_files):
                self.metadata.append(self.process_dicom(dicom_file))

        self.df = pd.DataFrame(self.metadata)

    def exec(self, dataset,debug=0):
        if(os.path.exists('./dicom_metadata.csv')):
            self.df = pd.read_csv('./dicom_metadata.csv')
        else:
            self.runner('../input/rsna-miccai-brain-tumor-radiogenomic-classification/','./', dataset, debug)

In [None]:
dicom_train = Dicom()
dicom_train.exec('train')

In [None]:
dicom_test = Dicom()
dicom_test.exec('test')

In [None]:
dicom_train.df.head()

In [None]:
dicom_test.df.head()

In [None]:
pd.concat([dicom_train.df,dicom_test.df]).to_csv('./dicom_metadata.csv',index=False)