In [1]:
# importing element tree
import xml.etree.ElementTree as ET 

# Pass the path of the xml document 
tree = ET.parse('desc2022.xml') 

# get the parent tag 
root = tree.getroot() 

 

## Getting to Know the XML structure

In [2]:
root.tag

'DescriptorRecordSet'

In [3]:
root.attrib

{'LanguageCode': 'eng'}

In [4]:
child=root[0]
print('\n')
print(child.tag, child.attrib)
print('\n')


for subchild in child:
    print(subchild.tag, subchild.attrib)

print('\n')
print(f'DescriptorName:  {child[1][0].text}')
print(f'First item of TreeNumberList:  {child[11][0].text}')




DescriptorRecord {'DescriptorClass': '1'}


DescriptorUI {}
DescriptorName {}
DateCreated {}
DateRevised {}
DateEstablished {}
AllowableQualifiersList {}
HistoryNote {}
OnlineNote {}
PublicMeSHNote {}
PreviousIndexingList {}
PharmacologicalActionList {}
TreeNumberList {}
ConceptList {}


DescriptorName:  Calcimycin
First item of TreeNumberList:  D03.633.100.221.173


## Finding the DescriptorName using the DescriptorUI

In [5]:
def find_name_with_ui(ui):
    for child in root:## we iterate through all the root's children
        if child[0].text == ui: ## we know thanks to our code above that the first element of the child is the DescriptorUI
            return child[1][0].text ## we know the second element is the DescriptorName. 
            ## We have to go one step further in because of the nested String tag
    return 'No such DescriptorUI in XML file.'
        
        

In [6]:
print(find_name_with_ui('D007154'))

Immune System Diseases


In [46]:
print(find_name_with_ui('D007184')) ## testing another ui
print(find_name_with_ui('D007888')) ## testing another ui
print(find_name_with_ui('D9999999999')) ## testing with erroneous input

Incontinentia Pigmenti
Leigh Disease
No such DescriptorUI in XML file.


## Finding the DescriptorUI using the DescriptorName

In [8]:
def find_ui_with_name(name):
    for child in root:## we iterate through all the root's children
        if child[1][0].text == name: ## we know the second element is the DescriptorName.
            return child[0].text   ## we know first element of the child is the DescriptorUI
            
    return 'No such DescriptorName in XML file.'

In [9]:
print(find_ui_with_name('Nervous System Diseases'))

D009422


In [10]:
print(find_ui_with_name('Incontinentia Pigmenti')) ## testing another ui
print(find_ui_with_name('Abdomen')) ## testing another ui
print(find_ui_with_name('Pop tarts addiction')) ## testing with bogus input

D007184
D000005
No such DescriptorName in XML file.


## Common parents

In [47]:
## we first write a function that finds the Treenumber given either a DescriptorName or DescriptorUI.
## We only admit one treenumber to simplify

def get_treeNumber_with_name_or_ui(heading_or_ui):
    ## if need be, we convert the parameter to DescriptorUI format 
    if not 'D0' in heading_or_ui:  ##all UIs begin with the letter D followed by the number 0
        heading_or_ui = find_ui_with_name(heading_or_ui)
    for child in root:
        if child[0].text == heading_or_ui:
            for concept in child.iter('TreeNumberList'):
                return concept[0].text
    return 'not found'


In [48]:
print(get_treeNumber_with_name_or_ui('Nervous System Diseases'))
print(get_treeNumber_with_name_or_ui('D007154'))
print(get_treeNumber_with_name_or_ui('Abdomen'))



C10
C20
A01.923.047


In [53]:
def common_parents(parent1, parent2): ## the parents can only be in DescriptorName or DescriptorUI Format
    
    ## from the Name/UI, get the tree name
    parentTree1 = get_treeNumber_with_name_or_ui(parent1) + '.'
    parentTree2 = get_treeNumber_with_name_or_ui(parent2) + '.'
    
    result = set()  ##initializing an empty set

    for child in root:
        for concept in child.iter('TreeNumberList'): ## only looking at the TreeNumberList of each entry
            for treeNumber in concept:   ## iterating through each TreeNumber in that list
                if parentTree1 in treeNumber.text:
                    for treeNumber in concept:
                        if parentTree2 in treeNumber.text:
                            result.add(child[1][0].text) ## data structure set prevents duplicates automatically
    return result

In [54]:
print(common_parents('Nervous System Diseases', 'D007154'))

{'Polyradiculoneuropathy', 'Mevalonate Kinase Deficiency', 'Autoimmune Diseases of the Nervous System', 'Encephalomyelitis, Acute Disseminated', 'Multiple Sclerosis', 'Stiff-Person Syndrome', 'Diffuse Cerebral Sclerosis of Schilder', 'Miller Fisher Syndrome', 'Polyradiculoneuropathy, Chronic Inflammatory Demyelinating', 'Multiple Sclerosis, Relapsing-Remitting', 'Multiple Sclerosis, Chronic Progressive', 'Microscopic Polyangiitis', 'Anti-N-Methyl-D-Aspartate Receptor Encephalitis', 'AIDS Dementia Complex', 'Leukoencephalitis, Acute Hemorrhagic', 'Ataxia Telangiectasia', 'Myasthenia Gravis', 'Neuromyelitis Optica', 'Lambert-Eaton Myasthenic Syndrome', 'Myasthenia Gravis, Autoimmune, Experimental', 'Myelitis, Transverse', 'Guillain-Barre Syndrome', 'Vasculitis, Central Nervous System', 'Lupus Vasculitis, Central Nervous System', 'Demyelinating Autoimmune Diseases, CNS', 'Autoimmune Hypophysitis', 'Myasthenia Gravis, Neonatal', 'Neuritis, Autoimmune, Experimental', 'Nervous System Autoimm

The above result lists all the medical conditions which are both Nervous system diseases and Immune System diseases: in short, immune system diseases of the nervous system. For instance the Guillain-Barre Syndrome (GBS) "is a rare neurological disorder in which the body's immune system mistakenly attacks part of its peripheral nervous system" (NINDS definition)

In [55]:
## Testing other input
print(common_parents('D001187', 'Prostheses and Implants')) 
## what are artificial organs (D001187) which are also Prostheses/Implants?

{'Heart-Assist Devices', 'Heart, Artificial', 'Urinary Sphincter, Artificial', 'Artificial Limbs', 'Larynx, Artificial'}
