# Enumerated Values and Defined Terms
DICOM Standard Part 3 describes the relationship between Attributes and Value Sets per Information Object Definition (IOD). The Value Sets can be in form of Context Identifier (CIDs), Enumerated Values, and Defined Terms. The CID elements are stored in Part 16, and other two are described in Part 3. This script extracts them from Part 3 file from this repo, which had been extracted from DICOM Standard XML files.

In [3]:
import pandas as pd
part3 = pd.read_pickle('./files/DICOM Standard/part3_mapping.pkl')

In [4]:
part3['attribute_description_str'] = part3['Attribute Description'].astype(str)

In [5]:
part3[part3['attribute_description_str']!='{}'].head(5)

Unnamed: 0,xml_id,iod,IE,Module,Reference,Usage,Usage_code,Reference_adjusted,Attribute Name,Tag,Type,Attribute Description,CID,SOP Class UID,attribute_description_str
2,table_A.2-1,Computed Radiography Image IOD Modules,Patient,Patient,sect_C.7.1.1,M,M,sect_C.7.1.1,Type of Patient ID,00100022,3,"{'Defined Terms': ['TEXT', 'RFID', 'BARCODE']}",,1.2.840.10008.5.1.4.1.1.1,"{'Defined Terms': ['TEXT', 'RFID', 'BARCODE']}"
7,table_A.2-1,Computed Radiography Image IOD Modules,Patient,Patient,sect_C.7.1.1,M,M,sect_C.7.1.1,Patient's Sex,00100040,2,"{'Enumerated Values': ['M', 'F', 'O']}",,1.2.840.10008.5.1.4.1.1.1,"{'Enumerated Values': ['M', 'F', 'O']}"
9,table_A.2-1,Computed Radiography Image IOD Modules,Patient,Patient,sect_C.7.1.1,M,M,sect_C.7.1.1,Quality Control Subject,00100200,3,"{'Enumerated Values': ['YES', 'NO']}",,1.2.840.10008.5.1.4.1.1.1,"{'Enumerated Values': ['YES', 'NO']}"
31,table_A.2-1,Computed Radiography Image IOD Modules,Patient,Patient,sect_C.7.1.1,M,M,sect_C.7.1.1,Patient Identity Removed,00120062,3,"{'Enumerated Values': ['YES', 'NO']}",,1.2.840.10008.5.1.4.1.1.1,"{'Enumerated Values': ['YES', 'NO']}"
79,table_A.2-1,Computed Radiography Image IOD Modules,Study,Patient Study,sect_C.7.2.2,U,U,sect_C.7.2.2,Smoking Status,001021A0,3,"{'Enumerated Values': ['YES', 'NO', 'UNKNOWN']}",,1.2.840.10008.5.1.4.1.1.1,"{'Enumerated Values': ['YES', 'NO', 'UNKNOWN']}"


In [6]:
defined_term = part3[part3['attribute_description_str'].str.contains('Defined')]['Attribute Description'].reset_index(drop=True)
enumerated_values = part3[part3['attribute_description_str'].str.contains('Enumerated')]['Attribute Description'].reset_index(drop=True)

In [7]:
enumerated_values.shape

(3931,)

In [8]:
defined_term.shape

(773,)

In [9]:
combined_defined_terms = []

for terms in defined_term:
    for key in terms:
        value = terms[key]
        combined_defined_terms.extend(value)

combined_defined_terms = list(set(combined_defined_terms))

In [10]:
len(combined_defined_terms) #773 to 345

345

In [11]:
combined_defined_terms

['RESEARCH',
 'SILICON_GEL',
 'CYLINDRICAL_RING',
 '30CMX40CM',
 'CG',
 'NAT',
 'GATING',
 'LLD',
 'IN',
 'FREE_PRECESSION',
 '2',
 'ENDOCAV_CLA',
 'PA',
 'IV_PHASED',
 '22',
 'CENTRIC',
 'VECTOR_PHASED',
 'VOLUME',
 'ISIS',
 'PLAN_OVERVIEW',
 'FAT',
 'CARBON DIOXIDE',
 'SIGMOID',
 '10',
 'LLO',
 'MP',
 'PANORAMIC',
 'CIRCULAR',
 'FILM',
 'LONGITUDINAL',
 'CUBIC',
 'OSP',
 'SIMULATOR',
 'SYN',
 '2D',
 'ILOPS',
 'SCAT',
 'FAT_AND_WATER',
 'GONADS',
 'MANUAL',
 'RR_INTERVAL',
 'INTERFEROMETRY',
 'TIME',
 'ROTA',
 '15',
 'CARM',
 '1H',
 'MLEV',
 'WOBBLE',
 'CURVED LINEAR',
 'SUB',
 'SUM',
 'OCT',
 'RFID',
 '8',
 'PMOT',
 'ERROR',
 'DYNAMIC',
 'GATED_PULLBACK',
 'PHASE_RESCANNING',
 'LINEAR_TILT',
 '19',
 'TRANSVERSE',
 'SLIT_BASED',
 'SPLIT XTAL CWD',
 'REFLECTION',
 'MONOPLANE',
 'FULL',
 'HEXAGONAL',
 'NCO',
 'BELT',
 'MANUAL_PULLBACK',
 'UNIF',
 'MOTOR_PULLBACK',
 'WEIGHTED',
 'CORRECTION',
 'GAUSSIAN',
 'SINGLE PHASE',
 'SS',
 'ROW',
 'OPTICAL',
 'CLAMSHELL',
 'PDR',
 '19F',
 'DV',
 '

In [12]:
combined_enumerated_values = []

for terms in enumerated_values:
    for key in terms:
        value = terms[key]
        combined_enumerated_values.extend(value)

combined_enumerated_values = list(set(combined_enumerated_values))

In [13]:
combined_enumerated_values

['RESEARCH',
 'VA',
 'SITE',
 '2',
 'YES',
 'SWIVEL',
 'VOLUME',
 'SCOORD',
 'OSS',
 'B',
 'UNALTERED',
 'VERIFIED',
 '10',
 'LOCAL',
 'USED_DISCARDED',
 'CTPROTOCOL',
 '90',
 'PR',
 'CIRCULAR',
 'LBMJANMA',
 '0000H',
 'INCONCLUSIVE',
 'IDENTITY',
 'NO',
 'PCN',
 'SLAB',
 'DEVICE',
 'SEG',
 'K',
 'S',
 'SOURCE',
 '2D',
 'MANUAL',
 'NS',
 'TIME',
 'WAVEFORM',
 '15',
 'OAM',
 'QUADRUPED',
 'SUB',
 'LBMJAMES128',
 'FEET_TO_HEAD',
 'DATETIME',
 'FID',
 '8',
 'PALETTE COLOR',
 'DYNAMIC',
 'PLANAR',
 'MINERAL_OIL',
 'EXTERNAL',
 '00181065H',
 'PORTRAIT',
 'P',
 'FINAL',
 '0002',
 'AO',
 'MINIMUM_IP',
 'FULL',
 'RELATIVE',
 'HEXAGONAL',
 'RTDOSE',
 'FLUORESCENCE',
 'M',
 'DIGITAL_DETECTOR',
 'NOT_ACCEPTABLE',
 'OP',
 'MONOCHROME2',
 'REGISTRATION',
 'L',
 'CROSSCURVE',
 'ROW',
 'COMPLETED',
 'IRIG',
 'SERVICE',
 'G',
 '32',
 'REAL',
 'MANUFACTURER',
 '0001',
 'PRODUCT',
 'SERIES',
 'LIN',
 'GPS',
 'MG',
 'MINUTE',
 'Y',
 'EYE',
 'VOLUME_RENDERED',
 'PARTIAL',
 'RETROSPECTIVE',
 'FRAME',
 'REC

In [14]:
len(combined_enumerated_values) #3931 to 337

337

In [15]:
combined_defined_terms = set(combined_defined_terms)
combined_enumerated_values = set(combined_enumerated_values)

In [16]:
overlap = combined_defined_terms.intersection(combined_enumerated_values)

In [17]:
len(overlap)

55

In [18]:
overlap

{'0',
 '1',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '2',
 '2D',
 '3',
 '3D',
 '3D_TEMPORAL',
 '6',
 '7',
 '8',
 '9',
 'AUTO',
 'BOTH',
 'BREAST',
 'CEPHALOSTAT',
 'CIRCULAR',
 'COLUMN',
 'CONTINUOUS',
 'DYNAMIC',
 'FIXED',
 'FREQUENCY',
 'FULL',
 'HEXAGONAL',
 'LIN',
 'MAMMOGRAPHIC',
 'MANUAL',
 'MR',
 'NONE',
 'NORMAL',
 'PLAN',
 'PULSED',
 'REALTIME',
 'RECTANGLE',
 'RESEARCH',
 'RETROSPECTIVE',
 'RIGID',
 'ROUND',
 'ROW',
 'SELECTIVE',
 'SERVICE',
 'STATIC',
 'STEP AND SHOOT',
 'SUB',
 'TEXT',
 'TIME',
 'US',
 'VOLUME',
 'WATER'}

## Are all Defined Terms and Enumerated Values valuable to add in the OMOP CDM? 

In [5]:
import pandas as pd
attributes = pd.read_csv("./files/DICOM Standard/part6_attributes.csv")
part3_att = part3.merge(attributes[['Tag_cleaned', 'VR']], how = 'inner', left_on = 'Tag', right_on = 'Tag_cleaned')

In [None]:
part3_att[(part3_att['VR']== "CS") & 
          (part3_att['IE']=="Patient") & 
          (part3_att['attribute_description_str']!="{}")][['Module','Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str
2,Patient,Type of Patient ID,100022,"{'Defined Terms': ['TEXT', 'RFID', 'BARCODE']}"
7,Patient,Patient's Sex,100040,"{'Enumerated Values': ['M', 'F', 'O']}"
9,Patient,Quality Control Subject,100200,"{'Enumerated Values': ['YES', 'NO']}"
31,Patient,Patient Identity Removed,120062,"{'Enumerated Values': ['YES', 'NO']}"


In [23]:
part3_att[(part3_att['VR']== "CS") & 
          (part3_att['IE']=="Study") & 
          (part3_att['Module']=="General Study") & 
          (part3_att['attribute_description_str']!="{}")][['Module', 'Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str


In [10]:
part3_att[(part3_att['VR']== "CS") & (part3_att['IE']=="Series") & (part3_att['attribute_description_str']!="{}")][['Module', 'Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str
103,General Series,Laterality,00200060,"{'Enumerated Values': ['R', 'L']}"
120,General Series,Anatomical Orientation Type,00102210,"{'Enumerated Values': ['BIPED', 'QUADRUPED']}"
123,CR Series,View Position,00185101,"{'Defined Terms': ['AP', 'PA', 'LL', 'RL', 'RL..."
4191,RT Series,Modality,00080060,"{'Enumerated Values': ['RTIMAGE', 'RTDOSE', 'R..."
5141,PET Series,SUV Type,00541006,"{'Enumerated Values': ['BSA', 'BW', 'LBM', 'LB..."
...,...,...,...,...
35073,CT Protocol Series,Modality,00080060,{'Enumerated Values': ['CTPROTOCOL']}
35430,XA Protocol Series,Modality,00080060,{'Enumerated Values': ['XAPROTOCOL']}
35729,Ophthalmic Tomography En Face Series,Modality,00080060,{'Enumerated Values': ['OPTENF']}
35988,Ophthalmic Tomography B-scan Volume Analysis S...,Modality,00080060,{'Enumerated Values': ['OPTBSV']}


In [13]:
part3_att[(part3_att['VR']== "CS") & (part3_att['IE']=="Series") & (part3_att['attribute_description_str']!="{}")]['Attribute Name'].unique()

array(['Laterality', 'Anatomical Orientation Type', 'View Position',
       'Modality', 'SUV Type', 'Counts Source', 'Series Type',
       'Reprojection Method', 'Corrected Image',
       'Randoms Correction Method', 'Decay Correction',
       'Acquisition Start Condition', 'Acquisition Termination Condition',
       'Field of View Shape', 'Type of Detector Motion',
       'Collimator Type', 'Secondary Counts Type',
       'Scan Progression Direction', 'Beat Rejection Flag',
       'Presentation Intent Type', 'Performed Protocol Type',
       'Body Part Examined'], dtype=object)

In [22]:
part3_att[(part3_att['VR']== "CS") & 
          (part3_att['IE']=="Series") & 
          (part3_att['Module']=="General Series") &
          (part3_att['attribute_description_str']!="{}") &
          ~(part3_att['Attribute Name'].isin(["Modality", 'Body Part Examined']))][['Module', 'Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str
103,General Series,Laterality,200060,"{'Enumerated Values': ['R', 'L']}"
120,General Series,Anatomical Orientation Type,102210,"{'Enumerated Values': ['BIPED', 'QUADRUPED']}"


In [11]:
part3_att[(part3_att['VR']== "CS") & (part3_att['IE']=="Image") & (part3_att['attribute_description_str']!="{}")][['Module', 'Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str
166,General Image,Quality Control Image,00280300,"{'Enumerated Values': ['YES', 'NO', 'BOTH']}"
167,General Image,Burned In Annotation,00280301,"{'Enumerated Values': ['YES', 'NO']}"
168,General Image,Recognizable Visual Features,00280302,"{'Enumerated Values': ['YES', 'NO']}"
169,General Image,Lossy Image Compression,00282110,"{'Enumerated Values': ['00', '01']}"
173,General Image,Presentation LUT Shape,20500020,"{'Enumerated Values': ['IDENTITY', 'INVERSE']}"
...,...,...,...,...
40997,Confocal Microscopy Image,Lossy Image Compression,00282110,"{'Enumerated Values': ['00', '01']}"
40998,Confocal Microscopy Image,Confocal Mode,00480114,"{'Enumerated Values': ['REFLECTANCE', 'FLUORES..."
40999,Confocal Microscopy Image,Tissue Location,00480115,"{'Enumerated Values': ['INVIVO', 'EXVIVO']}"
41002,Cutaneous Confocal Microscopy Image Acquisitio...,Field of View Shape,00181147,{'Defined Terms': ['RECTANGLE']}


In [17]:
part3_att[(part3_att['VR']== "CS") & 
          (part3_att['IE']=="Image") & 
          (part3_att['Module']=="General Image") &
          (part3_att['attribute_description_str']!="{}")]['Attribute Name'].unique()

array(['Quality Control Image', 'Burned In Annotation',
       'Recognizable Visual Features', 'Lossy Image Compression',
       'Presentation LUT Shape', 'Image Laterality'], dtype=object)

In [20]:
part3_att[(part3_att['VR']== "CS") & (part3_att['IE']=="Image") & (part3_att['Module']=="General Image") & (part3_att['attribute_description_str']!="{}")][['Module', 'Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str
166,General Image,Quality Control Image,280300,"{'Enumerated Values': ['YES', 'NO', 'BOTH']}"
167,General Image,Burned In Annotation,280301,"{'Enumerated Values': ['YES', 'NO']}"
168,General Image,Recognizable Visual Features,280302,"{'Enumerated Values': ['YES', 'NO']}"
169,General Image,Lossy Image Compression,282110,"{'Enumerated Values': ['00', '01']}"
173,General Image,Presentation LUT Shape,20500020,"{'Enumerated Values': ['IDENTITY', 'INVERSE']}"
175,General Image,Image Laterality,200062,"{'Enumerated Values': ['R', 'L', 'U', 'B']}"


In [12]:
part3_att[(part3_att['VR']== "CS") & (part3_att['IE']=="Acquisition") & (part3_att['attribute_description_str']!="{}")][['Module', 'Attribute Name', 'Tag', 'attribute_description_str']].drop_duplicates()

Unnamed: 0,Module,Attribute Name,Tag,attribute_description_str


In [32]:
body_part = pd.read_pickle('./files/DICOM Standard/part16_body_part_examined.pkl')

In [34]:
body_part.head(20)

Unnamed: 0,Coding Scheme Designator,Code Value,Code Meaning,Body Part Examined,SNOMED-RT ID (Retired),FMA Code Value,UMLS Concept UniqueID
0,SCT,818981001,Abdomen,ABDOMEN,,,
1,SCT,818982008,Abdomen and Pelvis,ABDOMENPELVIS,,,
2,SCT,7832008,Abdominal aorta,ABDOMINALAORTA,T-42500,,
3,SCT,85856004,Acromioclavicular joint,ACJOINT,T-15420,,
4,SCT,23451007,Adrenal gland,ADRENAL,T-B3000,,
5,SCT,77012006,Amniotic fluid,AMNIOTICFLUID,T-F1320,,
6,SCT,70258002,Ankle joint,ANKLE,T-15750,,
7,SCT,128585006,Anomalous pulmonary vein,,T-48503,,
8,SCT,128553008,Antecubital vein,ANTECUBITALV,T-49215,,
9,SCT,194996006,Anterior cardiac vein,ANTCARDIACV,T-48403,,


In [20]:
# Looking through General Series Attributes (5)
part3_att[(part3_att['VR']=="CS") & (part3_att['Tag'].isin(['00200060','00185100', '00102210']))]
# Modality: C.7.3.1.1.1 for Defined Terms (https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.3.html#sect_C.7.3.1.1.1)
# Body Part Examined: Annex L Correspondence of Anatomic Region Codes and Body Part Examined Defined Terms (https://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_l.html)
# Laterality: R = Right, L = Left
# Patient Position: Section C.7.3.1.1.2 (https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.3.html#sect_C.7.3.1.1.2)
# Anatomic Orientation Type: BIPED, QUADRUPED

Unnamed: 0,xml_id,iod,IE,Module,Reference,Usage,Usage_code,Reference_adjusted,Attribute Name,Tag,Type,Attribute Description,CID,SOP Class UID,attribute_description_str,Tag_cleaned,VR
103,table_A.2-1,Computed Radiography Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Laterality,00200060,2C,"{'Enumerated Values': ['R', 'L']}",,1.2.840.10008.5.1.4.1.1.1,"{'Enumerated Values': ['R', 'L']}",00200060,CS
116,table_A.2-1,Computed Radiography Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Patient Position,00185100,2C,{},,1.2.840.10008.5.1.4.1.1.1,{},00185100,CS
120,table_A.2-1,Computed Radiography Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Anatomical Orientation Type,00102210,1C,"{'Enumerated Values': ['BIPED', 'QUADRUPED']}",,1.2.840.10008.5.1.4.1.1.1,"{'Enumerated Values': ['BIPED', 'QUADRUPED']}",00102210,CS
391,table_A.3-1,CT Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Laterality,00200060,2C,"{'Enumerated Values': ['R', 'L']}",,1.2.840.10008.5.1.4.1.1.2,"{'Enumerated Values': ['R', 'L']}",00200060,CS
404,table_A.3-1,CT Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Patient Position,00185100,2C,{},,1.2.840.10008.5.1.4.1.1.2,{},00185100,CS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40885,table_A.90.1.3-1,Confocal Microscopy Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Patient Position,00185100,2C,{},,1.2.840.10008.5.1.4.1.1.77.1.8,{},00185100,CS
40889,table_A.90.1.3-1,Confocal Microscopy Image IOD Modules,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Anatomical Orientation Type,00102210,1C,"{'Enumerated Values': ['BIPED', 'QUADRUPED']}",,1.2.840.10008.5.1.4.1.1.77.1.8,"{'Enumerated Values': ['BIPED', 'QUADRUPED']}",00102210,CS
41143,table_A.90.2.3-1,Confocal Microscopy Tiled Pyramidal Image IOD ...,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Laterality,00200060,2C,"{'Enumerated Values': ['R', 'L']}",,1.2.840.10008.5.1.4.1.1.77.1.9,"{'Enumerated Values': ['R', 'L']}",00200060,CS
41156,table_A.90.2.3-1,Confocal Microscopy Tiled Pyramidal Image IOD ...,Series,General Series,sect_C.7.3.1,M,M,sect_C.7.3.1,Patient Position,00185100,2C,{},,1.2.840.10008.5.1.4.1.1.77.1.9,{},00185100,CS


In [21]:
# Looking through General Image Attributes (9)
part3_att[(part3_att['VR']=="CS") & (part3_att['Module'] == "General Image" )][['Attribute Name', 'attribute_description_str']].drop_duplicates()
# Patient Orientation: C.7.6.1.1.1 (https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.html#sect_C.7.6.1.1.1)
    # different values are allowed based on Anatomic Orientation Type (0010,2210) BIPED > A (anterior), P (posterior), etc.; QUADRUPED > LE (Left), RT (Right), D (Dorsal), etc.
# Image Type: C.7.6.1.1.2 (multi values: value 1= ORIGINAL, DERIVED; value 2 = PRIMARY, SECONDARY)
# Quality Control Image: YES, NO, BOTH
# Burned In Annotation: YES, NO
# Recognizable Visual Features: YES, NO
# Lossy Image Compression: 00 (image has not been subjected to lossy compression ~ NO), 01 (image has been subjected to lossy compression ~ YES)
# Lossy Image Compression Method: C.7.6.1.1.5.1 (https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.html#sect_C.7.6.1.1.5.1)
# Presentation LUT Shape: 'IDENTITY', 'INVERSE'
# Image Laterality: 'R' Right, 'L' Left, 'U' Unparied, 'B' Both left and right

Unnamed: 0,Attribute Name,attribute_description_str
161,Patient Orientation,{}
164,Image Type,{}
166,Quality Control Image,"{'Enumerated Values': ['YES', 'NO', 'BOTH']}"
167,Burned In Annotation,"{'Enumerated Values': ['YES', 'NO']}"
168,Recognizable Visual Features,"{'Enumerated Values': ['YES', 'NO']}"
169,Lossy Image Compression,"{'Enumerated Values': ['00', '01']}"
171,Lossy Image Compression Method,{}
173,Presentation LUT Shape,"{'Enumerated Values': ['IDENTITY', 'INVERSE']}"
175,Image Laterality,"{'Enumerated Values': ['R', 'L', 'U', 'B']}"


# Harvest select Defined Terms
## Modality

In [51]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the DICOM section
url = 'https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.3.html#sect_C.7.3.1.1.1'

# Fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
soup

# Find the section that contains the list of codes and descriptions
# The structure is a <dl> element containing <dt> (codes) and <dd> (descriptions)
dl_section = soup.find('dl', class_='variablelist compact')

# Locate the section by finding the <a> tag with the correct id (sect_C.7.3.1.1.1)
section_anchor = soup.find('a', {'id': 'sect_C.7.3.1.1.1'})

if section_anchor:
    # Find the next <dl> (description list) tag that contains the codes and descriptions
    dl_section = section_anchor.find_next('dl', class_='variablelist compact')

    if dl_section:
        # Create lists to store codes and their descriptions
        codes = []
        descriptions = []

        # Loop through each <dt> (codes) and <dd> (descriptions)
        for dt in dl_section.find_all('dt'):
            # Extract the code text from the <span class="term"> within the <dt> tag
            code = dt.find('span', class_='term').get_text(strip=True)

            # Extract the description text from the <p> tag within the corresponding <dd> tag
            dd = dt.find_next_sibling('dd')
            description = dd.find('p').get_text(strip=True)

            # Append the code and description to the lists
            codes.append(code)
            descriptions.append(description)

        # Create a DataFrame with the extracted codes and descriptions
        modality_defined_terms = pd.DataFrame({
            'code': codes,
            'description': descriptions
        })
    else:
        print("Description list (dl) not found after the section anchor.")
else:
    print("Section anchor with id 'sect_C.7.3.1.1.1' not found.")


In [52]:
modality_defined_terms

Unnamed: 0,code,description
0,ANN,Annotation
1,AR,Autorefraction
2,ASMT,Content Assessment Results
3,AU,Audio
4,BDUS,Bone Densitometry (ultrasound)
...,...,...
74,US,Ultrasound
75,VA,Visual Acuity
76,XA,X-Ray Angiography
77,XAPROTOCOL,XA Protocol (Performed)


In [57]:
modality_defined_terms.to_csv('./files/DICOM Standard/part3_modality.csv', index=False)

## Annex L Body Part Examined

In [38]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the DICOM section
url = 'https://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_l.html'

# Fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
soup

# Locate the specific table by finding the <a> tag with the id="table_L-1"
table_anchor = soup.find('a', {'id': 'table_L-1'})

if table_anchor:
    # Find the table contents after the anchor
    table = table_anchor.find_next('table')

    # Extract table headers
    headers = []
    for th in table.find_all('th'):
        header_text = th.get_text(strip=True)
        headers.append(header_text)

    # Extract table rows
    rows = []
    for tr in table.find_all('tr')[1:]:  # Skip the header row
        row = []
        for td in tr.find_all('td'):
            cell_text = td.get_text(strip=True)
            row.append(cell_text)
        rows.append(row)

    # Create a DataFrame with the extracted headers and rows
    body_part_examined_defined_terms = pd.DataFrame(rows, columns=headers)

else:
    print("Table L-1 not found")

In [39]:
body_part_examined_defined_terms

Unnamed: 0,Coding Scheme Designator,Code Value,Code Meaning,Body Part Examined,SNOMED-RT ID (Retired),FMA Code Value,UMLS Concept UniqueID
0,SCT,818981001,Abdomen,ABDOMEN,,,
1,SCT,818982008,Abdomen and Pelvis,ABDOMENPELVIS,,,
2,SCT,7832008,Abdominal aorta,ABDOMINALAORTA,T-42500,,
3,SCT,85856004,Acromioclavicular joint,ACJOINT,T-15420,,
4,SCT,23451007,Adrenal gland,ADRENAL,T-B3000,,
...,...,...,...,...,...,...,...
393,SCT,85234005,Vertebral artery,VERTEBRALA,T-45700,,
394,SCT,110517009,Vertebral column and cranium,,T-11011,,
395,SCT,45292006,Vulva,VULVA,T-81000,,
396,SCT,74670003,Wrist joint,WRIST,T-15460,,


In [65]:
body_part_examined_defined_terms.to_pickle('./files/DICOM Standard/part16_body_part_examined.pkl')

## Patient Position

In [53]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the DICOM section
url = 'https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.3.html#sect_C.7.3.1.1.2'

# Fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Locate the section by finding the <a> tag with the correct id (sect_C.7.3.1.1.2)
section_anchor = soup.find('a', {'id': 'sect_C.7.3.1.1.2'})

if section_anchor:
    # Find the next <dl> (description list) tag that contains the codes and descriptions
    dl_section = section_anchor.find_next('dl', class_='variablelist compact')

    if dl_section:
        # Create lists to store codes and their descriptions
        codes = []
        descriptions = []

        # Loop through each <dt> (codes) and <dd> (descriptions)
        for dt in dl_section.find_all('dt'):
            # Extract the code text from the <span class="term"> within the <dt> tag
            code = dt.find('span', class_='term').get_text(strip=True)

            # Extract the description text from the <p> tag within the corresponding <dd> tag
            dd = dt.find_next_sibling('dd')
            description = dd.find('p').get_text(strip=True)

            # Append the code and description to the lists
            codes.append(code)
            descriptions.append(description)

        # Create a DataFrame with the extracted codes and descriptions
        patient_position_defined_terms = pd.DataFrame({
            'code': codes,
            'description': descriptions
        })

    else:
        print("Description list (dl) not found after the section anchor.")
else:
    print("Section anchor with id 'sect_C.7.3.1.1.2' not found.")

In [54]:
patient_position_defined_terms

Unnamed: 0,code,description
0,HFP,Head First-Prone
1,HFS,Head First-Supine
2,HFDR,Head First-Decubitus Right
3,HFDL,Head First-Decubitus Left
4,FFDR,Feet First-Decubitus Right
5,FFDL,Feet First-Decubitus Left
6,FFP,Feet First-Prone
7,FFS,Feet First-Supine
8,LFP,Left First-Prone
9,LFS,Left First-Supine


In [62]:
patient_position_defined_terms.to_csv('./files/DICOM Standard/part3_patient_position.csv', index=False)

## Lossy Image Compression Method

In [55]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the DICOM section
url = 'https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.7.6.html#sect_C.7.6.1.1.5.1'

# Fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Locate the section by finding the <a> tag with the correct id (sect_C.7.6.1.1.5.1)
section_anchor = soup.find('a', {'id': 'sect_C.7.6.1.1.5.1'})

if section_anchor:
    # Find the next <dl> (description list) tag that contains the codes and descriptions
    dl_section = section_anchor.find_next('dl', class_='variablelist compact')

    if dl_section:
        # Create lists to store codes and their descriptions
        codes = []
        descriptions = []

        # Loop through each <dt> (codes) and <dd> (descriptions)
        for dt in dl_section.find_all('dt'):
            # Extract the code text from the <span class="term"> within the <dt> tag
            code = dt.find('span', class_='term').get_text(strip=True)

            # Extract the description text from the <p> tag within the corresponding <dd> tag
            dd = dt.find_next_sibling('dd')
            description = dd.find('p').get_text(strip=True)

            # Append the code and description to the lists
            codes.append(code)
            descriptions.append(description)

        # Create a DataFrame with the extracted codes and descriptions
        lossy_image_comp_method_defined_terms = pd.DataFrame({
            'code': codes,
            'description': descriptions
        })

    else:
        print("Description list (dl) not found after the section anchor.")
else:
    print("Section anchor with id 'sect_C.7.6.1.1.5.1' not found.")

In [56]:
lossy_image_comp_method_defined_terms

Unnamed: 0,code,description
0,ISO_10918_1,JPEG Lossy Compression[ISO/IEC 10918-1]
1,ISO_14495_1,JPEG-LS Near-lossless Compression[ISO/IEC 1449...
2,ISO_15444_1,JPEG 2000 Irreversible Compression[ISO/IEC 154...
3,ISO_15444_15,High-Throughput JPEG 2000 Irreversible Compres...
4,ISO_18181_1,JPEG XL Image Coding System - Part 1 Core Codi...
5,ISO_13818_2,MPEG2 Compression[ISO/IEC 13818-2]
6,ISO_14496_10,MPEG-4 AVC/H.264 Compression[ISO/IEC 14496-10]
7,ISO_23008_2,HEVC/H.265 Lossy Compression[ISO/IEC 23008-2]


In [61]:
lossy_image_comp_method_defined_terms.to_csv('./files/DICOM Standard/part3_lossy_image_comp_methods.csv', index=False)