# Basic functions to read and parse DICOM metadata (and images)

Goal: Extract all metadata agnostic to input file contents using `pydicom`

Result: Here is a starting point for working with DICOM metadata. I have tested it on one MRI image, so it would be good to test on others.

In [60]:
import pydicom as dcm
import pandas as pd


def dicom_dict(dcm_file, verbose=False):
    d = {}
    for element in dcm_file:
        _k = element.keyword
        if verbose:
            print(_k)
        _v = element.value
        if element.is_empty:
            if verbose:
                print("    Found empty value")
        elif element.is_undefined_length:
            if verbose:
                print(f"   Value: {type(_v)}")     
        else:
            if verbose:
                print(f"   Value: {type(_v)}, Length: {len(str(_v))}")
        if verbose: 
            print()
        d[_k] = _v
    return d


def describe_dicom_dict(dicom_dict):
    for _k, _v in dicom_dict.items():
        print(f"{str(type(_v)):>40s}  {_k:15s} ")


def sort_elements_by_type(dicom_dict):
    element_type_d = {}
    for _k, _v in dicom_dict.items():
        element_type = str(type(_v)).split("'")[1]
        element_name = _k
        if element_type in element_type_d.keys():
            element_type_d[element_type][_k] = _v
        else:
            element_type_d[element_type] = {_k: _v}
    return element_type_d


def describe_elements_by_type(dicom_dict):
    d = sort_elements_by_type(dicom_dict)
    print("Elements")
    for _k, _v in d.items():
        print(f" - {len(_v.keys()):3d} {_k}")


def summarize_elements_by_type(dicom_dict):
    d = sort_elements_by_type(dicom_dict)
    _l = []
    for _k, _v in d.items():
        _l.append([len(_v), _k])
    df = pd.DataFrame(_l, columns=['Count', 'Type'])
    return df.sort_values('Count', ascending=False).reset_index(drop=True)


dcm_file = dcm.read_file('PancreaticTumor/EnIm1.dcm')
element_dict = dicom_dict(dcm_file)
describe_dicom_dict(element_dict)
element_by_type = sort_elements_by_type(element_dict)
# describe_elements_by_type(element_dict)
df = summarize_elements_by_type(d)
df

   <class 'pydicom.multival.MultiValue'>  ImageType       
                           <class 'str'>  InstanceCreationDate 
                           <class 'str'>  InstanceCreationTime 
               <class 'pydicom.uid.UID'>  SOPClassUID     
               <class 'pydicom.uid.UID'>  SOPInstanceUID  
                           <class 'str'>  StudyDate       
                           <class 'str'>  SeriesDate      
                           <class 'str'>  ContentDate     
                           <class 'str'>  AcquisitionDateTime 
                           <class 'str'>  StudyTime       
                           <class 'str'>  SeriesTime      
                           <class 'str'>  ContentTime     
                           <class 'str'>  AccessionNumber 
                           <class 'str'>  Modality        
                           <class 'str'>  Manufacturer    
                           <class 'str'>  InstitutionName 
   <class 'pydicom.valuerep.PersonName'>  

Unnamed: 0,Count,Type
0,51,str
1,10,int
2,8,pydicom.sequence.Sequence
3,5,pydicom.uid.UID
4,3,pydicom.valuerep.PersonName
5,3,pydicom.valuerep.IS
6,2,pydicom.multival.MultiValue
7,2,pydicom.valuerep.DSfloat
8,1,float
9,1,bytes
