# Terminology Server API Demonstration

Let's set up and then build a function that can help us exercise the API

In [52]:
import json
import os
from pprint import pprint as pp

import dotenv
import requests
import pandas as pd
from openpyxl import load_workbook, Workbook

dotenv.load_dotenv()


base_url = os.environ.get("TERM_SERVER_URL")

## Search Helper

This function helps us send a search request to the terminology server

In [7]:
def search(chunks, vocab=[], valuesets=[], domain_id=[], topk=15):
    url = base_url + "/search"

    headers = {"Content-Type": "application/json"}
    payload = {"chunks": chunks,
               "vocabulary_id": vocab,
               "domain_id": domain_id,
               "valueset_metadata_ids": valuesets,
               "topk": topk,
               "filter_by_valueset": len(valuesets) > 0}
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    return response.json()

In [31]:
search(["cough"])

{'cough': [{'chunk': 'cough',
   'document': 'cough',
   'cmetadata': {'ID': 46431,
    'source': 'Athena',
    'validity': True,
    'domain_id': 'Condition',
    'domain_name': 'Condition',
    'concept_code': 'R05',
    'concept_name': 'cough and cold preparations',
    'vocabulary_id': 'ICD10',
    'valid_end_date': '2099-12-31',
    'vocabulary_name': 'International Classification of Diseases, Tenth Revision (WHO)',
    'chunk_concept_id': 45606792,
    'concept_class_id': 'ICD10 Hierarchy',
    'standard_concept': 'Non-Standard',
    'valid_start_date': '1990-05-01',
    'domain_concept_id': 19,
    'concept_class_name': 'ICD10 Hierarchy',
    'vocabulary_concept_id': 44819124,
    'concept_class_concept_id': 45754906},
   'score': 1.0,
   'value_set_records': [],
   'code_map_records': {}},
  {'chunk': 'cough',
   'document': 'cough (finding)',
   'cmetadata': {'ID': 7249291,
    'source': 'JSL',
    'validity': False,
    'domain_id': 'Condition',
    'domain_name': 'Condition'

In [49]:
wb = load_workbook('JSL file.xlsx')
wb_sheets = {'Medication Example':{"df":None,
                                   "lookup_col":"Medication Name",
                                   "domain": "Drug"},
 # 'Med Frequency': {"df":None,
 #                   "lookup_col":"Frequency Value"},
 'Anesthesia Type': {"df":None,
                     "lookup_col":"Anesthesia code",
                     "domain":"Procedure"},
 'Procedure Names': {"df":None,
                     "lookup_col":"PROC_Name Code",
                     "domain":"Procedure"},
 'ICD O Topography': {"df":None,
                      "lookup_col":"description",
                      "domain":"Spec Anatomic Site"},
 'Radiology or Proc': {"df":None,
                       "lookup_col":"Radiology/Procedure Name",
                       "domain":"Procedure"},
 'Labs': {"df":None,
          "lookup_col":"test_name",
          "domain":"Measurement"},
 'DRG Quebec French': {"df":None,
                       "lookup_col":"DRG (French)",
                       "domain":"Procedure"}}

In [50]:
for sheet in wb_sheets.keys():
    wb_sheets[sheet]['df'] = pd.read_excel('JSL file.xlsx', sheet_name=sheet)

* Generate a spreadsheet which has a response for each of the first 25 rows in each of the 8 tabs they sent?
* I don’t think we need to respond for everything in the ‘Med-Frequency’, ‘Procedure Names’, or ‘Labs’ tabs – it doesn’t seem reasonable for a test/demo. We can give them a 30-day trial later or ask them to explain why they really need all of them before deciding about buying.
* For each of the first 25 terms within each tab, we should return the top several matches we find – across all terminologies. For some tabs, we may want to limit to some terminologies (i.e. only ICD-10-PCS and SNOMED-CT for procedure names), but in general we should return whatever is found.
* Do we have the ATC and DRG ontologies in the tool? If not, we should.
* For Med-Frequency and Labs, they may be looking for a Concept Map, to map the variety of input codes they get to a smaller list. That we’ll have to ask them.
* Once the response spreadsheet is generated - @Radu Bisca, we need a manual review of it to check that it’s correct.

In [55]:
for sheet, sheet_data in wb_sheets.items():
    wb_sheets[sheet]['response_data'] = pd.DataFrame()
    print(sheet)
    chunks = wb_sheets[sheet]['df'][wb_sheets[sheet]['lookup_col']].head(25).tolist()
    response = search(chunks,
                      topk=5,
                      domain_id=[wb_sheets[sheet]['domain']])
    for chunk, responses in response.items():
        resp_df = pd.DataFrame([x['cmetadata'] for x in responses])
        cols = resp_df.columns
        resp_df['input'] = chunk
        resp_df = resp_df[['input'] + cols.tolist()]
        wb_sheets[sheet]['response_data'] = pd.concat([wb_sheets[sheet]['response_data'], resp_df])


Medication Example
Anesthesia Type
Procedure Names
ICD O Topography
Radiology or Proc
Labs
DRG Quebec French


In [56]:
new_sheet = Workbook()
for sheet, sheet_data in wb_sheets.items():
    ws = new_sheet.create_sheet(title=sheet)
    ws.append(sheet_data['response_data'].columns.tolist())
    for row in sheet_data['response_data'].values.tolist():
        ws.append(row)

In [57]:
new_sheet.save('response_data.xlsx')

In [76]:
headers = {"accept": "application/json", "Content-Type": "application/json"}
payload = {
    "codes": ["I21.9"],
    # "vocabulary_id":"SNOMED",
    # source: None,
    # validity: None,
    # domain_id:  None,
    # concept_class_id:  None,
    # standard_concept:  None,
    "topk":  10,
}
response = requests.post(url = 'http://34.202.244.73:5001'+'/retrieve_by_codes',
                         headers=headers,
                         data=json.dumps(payload))

In [77]:
response.json()

{'I21.9': [{'concept_name': 'acute myocardial infarction, unspecified',
   'vocabulary_id': 'ICD10'},
  {'concept_name': 'acute myocardial infarction, unspecified',
   'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'myocardial infarction', 'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'heart attack', 'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'myocardial ischemia', 'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'ventricular septal rupture', 'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'coronary artery thrombosis', 'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'myocardial necrosis', 'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'new myocardial infarction compared to prior study',
   'vocabulary_id': 'ICD10CM'},
  {'concept_name': 'acute infarction of papillary muscle',
   'vocabulary_id': 'ICD10CM'}]}