In [1]:
import pandas as pd
import re

## Load SNOMED CT from its original file and lowercase all terms

In [2]:
SNOMED_TERMS = "./terminologies/SnomedCT_InternationalRF2_PRODUCTION_20200731T120000Z/Snapshot/Terminology/sct2_Description_Snapshot-en_INT_20200731.txt"
snomed = pd.read_csv(SNOMED_TERMS, sep='\t')
snomed['term'] = snomed['term'].str.lower()

## Search and filter functions

In [3]:
def search(term):
    term = term.lower()
    try:
        term_conceptId = snomed[(snomed.active==1)&(snomed.term==term)].iloc[0]['conceptId']
    except:
        print("NOT FOUND!")
        return([term])
    synonyms = snomed[(snomed.active==1)&(snomed.conceptId==term_conceptId)]['term'].tolist()
    #synonyms.remove(term) # remove itself? Depending on the implementation
    synonyms = filter(synonyms)
    return(synonyms)

# Filter to remove long useless names with parentheses, numbers, etc - characters and blanks only for now
def filter(synonyms):
    KEEP_IT = re.compile(r'^[a-zA-Z\s]+$')
    clean_synonyms = []
    for term in synonyms:
        if KEEP_IT.match(term):
            clean_synonyms.append(term)
    return(clean_synonyms)

## A few tests

In [4]:
search("aspirin")

['aspirin', 'acetylsalicylic acid']

In [5]:
search("cancer")

['blastoma',
 'malignancy',
 'cancer',
 'malignant neoplasm',
 'malignant tumour morphology',
 'malignant tumor morphology',
 'cancer morphology']

In [6]:
search("stomach")

['stomach', 'ventriculus', 'stomach structure']

In [7]:
search("pain")

['pain', 'dolor', 'painful', 'part hurts', 'pain observations']

In [8]:
search("pantoprazole")

['pantoprazole']

In [9]:
search("diabetes")

NOT FOUND!


['diabetes']