### Exploring SNOMED Hierarchies

In [68]:
import pandas as pd

In [69]:
mapped_terms = pd.read_excel('terms_mapped_to_codes.xlsx', sheet_name='terms', dtype={'concept_code': str, 'concept_code_2':str})
concept_df = pd.read_csv('ohdsi_athena_vocab/CONCEPT.csv', sep='\t', dtype={'concept_code': str, 'concept_id': str}, low_memory=False)
concept_relationship_df = pd.read_csv('ohdsi_athena_vocab/CONCEPT_RELATIONSHIP.csv', sep='\t', dtype={'concept_code': str}, low_memory=False)
concept_ancestor_df = pd.read_csv('ohdsi_athena_vocab/CONCEPT_ANCESTOR.csv', sep='\t',dtype={'ancestor_concept_id': str, 'descendant_concept_id': str}, low_memory=False)

In [70]:
mapped_terms_merge = mapped_terms.merge(concept_df, on='concept_code', how='left')

In [82]:
my_terms = set(mapped_terms_merge['concept_id'])

# Find those terms that are related (are ancestor and descendant)
filtered_ancestors = concept_ancestor_df[
    concept_ancestor_df['ancestor_concept_id'].isin(my_terms) & 
    concept_ancestor_df['descendant_concept_id'].isin(my_terms)
]

# Delete self references
filtered_ancestors = filtered_ancestors[filtered_ancestors.max_levels_of_separation>0]

descendants_terms = set(filtered_ancestors['descendant_concept_id'])
root_terms = my_terms - descendants_terms

In [93]:
# Dictionary to store hierarchy
hierarchy = {}

# Populate the hierarchy with root terms
for root in root_terms:
    hierarchy[root] = {'descendants': []}

# Recursive function to build the hierarchy
def build_hierarchy(term, level):
    # Find direct descendants of the current term
    direct_descendants = filtered_ancestors[
        (filtered_ancestors['ancestor_concept_id'] == term) & 
        (filtered_ancestors['max_levels_of_separation'] == level)
    ]['descendant_concept_id'].tolist()
    
    # For each descendant, recursively find and add its own descendants
    descendants_list = []
    for descendant in direct_descendants:
        descendants_list.append({
            'term': descendant,
            'descendants': build_hierarchy(descendant, level + 1)  # Recursive call for next level
        })
    
    return descendants_list

# Build hierarchy starting from each root term
for root in root_terms:
    hierarchy[root]['descendants'] = build_hierarchy(root, 1)

hierarchy

{'37018196': {'descendants': []},
 '4234649': {'descendants': []},
 '197672': {'descendants': []},
 '4148407': {'descendants': []},
 '4287112': {'descendants': []},
 '4206526': {'descendants': []},
 '4046360': {'descendants': []},
 '4150054': {'descendants': []},
 '31317': {'descendants': []},
 '4229715': {'descendants': []},
 '3655005': {'descendants': []},
 '317002': {'descendants': []},
 '4024166': {'descendants': []},
 '4276568': {'descendants': []},
 '43530623': {'descendants': []},
 '4245997': {'descendants': []},
 '432250': {'descendants': []},
 '4286359': {'descendants': []},
 '4009409': {'descendants': []},
 '37153627': {'descendants': []},
 '4024618': {'descendants': []},
 '4116678': {'descendants': []},
 '4098413': {'descendants': []},
 '4224118': {'descendants': []},
 '4148509': {'descendants': []},
 '436940': {'descendants': []},
 '4084390': {'descendants': []},
 '4103842': {'descendants': [{'term': '4306703', 'descendants': []}]},
 '4234469': {'descendants': []},
 '416553

In [92]:
print(len(my_terms), 'total terms and ', len(hierarchy), 'root terms.')

191 total terms and  152 root terms.


In [102]:
hierarchy['321588']['descendants'][0]

{'term': '44784217', 'descendants': []}