# MIMIC FHIR Terminology Generation
Convert mimic terminology into FHIR CodeSystems and ValueSets

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import json
import psycopg2
import requests
from pathlib import Path
import os
from datetime import datetime
from dotenv import load_dotenv

from fhir.resources.codesystem import CodeSystem, CodeSystemConcept
from fhir.resources.valueset import ValueSet

In [None]:
# load environment varialbes
load_dotenv(load_dotenv(Path(Path.cwd()).parents[0] / '.env'))

SQLUSER = os.getenv('SQLUSER')
SQLPASS = os.getenv('SQLPASS')
DBNAME_MIMIC = os.getenv('DBNAME_MIMIC')
HOST = os.getenv('DBHOST')
FHIR_SERVER = os.getenv('FHIR_SERVER')
MIMIC_TERMINOLOGY_PATH = os.getenv('MIMIC_TERMINOLOGY_PATH')

# Connect to database
con = psycopg2.connect(dbname=DBNAME_MIMIC, user=SQLUSER, password=SQLPASS, host=HOST)

In [None]:
MIMIC_TERMINOLOGY_PATH

In [None]:
# CodeSystems
codesystems = ['admission_class', 'admission_type', 'admission_type_icu',
               'admit_source', 'bodysite', 'd_items', 'd_labitems',
               'diagnosis_icd9', 'discharge_disposition', 'identifier_type',
               'lab_flags', 'lab_fluid', 'lab_priority', 'medadmin_category_icu', 
               'medication_method', 'medication_route', 'medication_site', 
               'microbiology_antibiotic', 'microbiology_interpretation', 
               'microbiology_organism', 'microbiology_test', 'observation_category', 
               'procedure_category', 'procedure_icd9', 'procedure_icd10', 'units']

valuesets = ['admission_class', 'admission_type', 'admission_type_icu',
             'admit_source', 'bodysite', 'chartevents_d_items', 'd_labitems',
             'datetimeevents_d_items', 'diagnosis_icd', 'discharge_disposition', 
             'identifier_type', 'lab_flags', 'lab_fluid', 'lab_priority', 
             'outputevents_d_items', 'medadmin_category_icu', 'medication_method', 
             'medication_route', 'medication_site', 'microbiology_antibiotic', 
             'microbiology_interpretation', 'microbiology_organism', 
             'microbiology_test', 'observation_category', 'procedure_category', 
             'procedureevents_d_items', 'procedure_icd', 'units']

# valuesets that have coded values, not direct reference to a CodeSystem
valueset_coded = ['chartevents_d_items', 'datetimeevents_d_items', 
                  'outputevents_d_items', 'procedureevents_d_items']
valueset_double_system = ['procedure_icd', 'diagnosis_icd']

## Steps to generate terminology
0. Generate terminology tables in postgres (should already be done from initial table generation). If not generate run *create_fhir_terminology.sql*
1. Pull terminology tables into Python
2. Create codesystem/valueset in python 
3. Output CodeSystem and ValueSet json

In [None]:
# static components
fhir_status = 'active'
fhir_content = 'complete'
version = '0.4'
publisher = 'KinD Lab'
current_date = str(datetime.now().strftime('%Y-%m-%dT%H:%M:%S-04:00'))
base_url = 'http://mimic.mit.edu/fhir/mimic'

### CodeSystem Generation

In [None]:
# Pull in all the terminology descriptions
q_cs_descriptions= f"SELECT * FROM fhir_trm.cs_descriptions;"
cs_descriptions = pd.read_sql_query(q_cs_descriptions,con)

for codesystem in codesystems:
  print(codesystem)
  cs = CodeSystem(status=fhir_status, content=fhir_content)
  cs.id = codesystem.replace('_','-')
  cs.url = f'{base_url}/CodeSystem/{cs.id}'
  cs.version = version
  cs.language = 'en'
  cs.name = codesystem.title().replace('_','')
  cs.title = cs.name
  cs.date = current_date
  cs.publisher = publisher
  cs.description = cs_descriptions[cs_descriptions['codesystem'] == codesystem]['description'].iloc[0]

  # Generate code/display combos from the fhir_trm tables
  q_codesystem = f"SELECT * FROM fhir_trm.cs_{codesystem};"
  df_codesystem = pd.read_sql_query(q_codesystem,con)
  concept=[]
  for _, row in df_codesystem.iterrows():  
    elem = {}
    elem['code'] = row['code']
    if 'display' in row:
        elem['display'] = row['display']
    concept.append(elem)

  cs.concept = concept

  # Write out CodeSystem json to terminology folder
  with open(f'{MIMIC_TERMINOLOGY_PATH}CodeSystem-{cs.id}.json','w') as outfile:  
    json.dump(json.loads(cs.json()), outfile, indent=4)



## ValueSet Generation

In [None]:
# Pull in all the valueset descriptions
q_vs_descriptions= f"SELECT * FROM fhir_trm.vs_descriptions;"
vs_descriptions = pd.read_sql_query(q_vs_descriptions,con)

for valueset in valuesets:
  print(valueset)
  vs = ValueSet(status=fhir_status)
  vs.id = valueset.replace('_','-')
  vs.url = f'{base_url}/ValueSet/{vs.id}'
  vs.version = version
  vs.language = 'en'
  vs.name = valueset.title().replace('_','')
  vs.title = vs.name
  vs.date = current_date
  vs.publisher = publisher
  vs.description = vs_descriptions[vs_descriptions['valueset'] == valueset]['description'].iloc[0]
  
  if valueset in valueset_coded:
    print('coded valueset')
    # Generate code/display combos from the fhir_trm tables
    q_valueset = f"SELECT * FROM fhir_trm.vs_{valueset};"
    df_valueset = pd.read_sql_query(q_valueset,con)
    include_dict = {}
    # Only coded values right now are d-items valuesets, would need to change system otherwise
    include_dict['system'] = f'{base_url}CodeSystem/d-items'
      
    # Create valueset codes
    concept = []    
    for index, row in df_valueset.iterrows():  
        elem = {}
        elem['code'] = row['code']
        if row['display'] != '' and not pd.isna(row['display']):
            elem['display'] = row['display']
        concept.append(elem)
        
    include_dict['concept'] = concept
    vs.compose = {'include': [include_dict]}
  elif valueset in valueset_double_system:
    # For valuesets who inherit from more than one CodeSystem
    # Store both systems in the ValueSet include
    print('double system valueset')

    # Grab systems from fhir_trm table
    q_valueset = f"SELECT * FROM fhir_trm.vs_{valueset};"
    df_valueset = pd.read_sql_query(q_valueset,con)

    include_list = []
    for sys in df_valueset.system:
      include_list.append({'system': sys})
    vs.compose = {'include': include_list}
  else:
    sys = {'system': f'{base_url}/CodeSystem/{vs.id}'}
    vs.compose = {'include': [sys]}


  # Write out ValueSet json to terminology folder
  with open(f'{MIMIC_TERMINOLOGY_PATH}ValueSet-{vs.id}.json','w') as outfile:  
    json.dump(json.loads(vs.json()), outfile, indent=4)


## POST TERMINOLOGY

In [2]:
import requests
import json
import os
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv
from py_mimic_fhir.lookup import MIMIC_CODESYSTEMS, MIMIC_VALUESETS

# Environment variables
load_dotenv(load_dotenv(Path(Path.cwd()).parents[0] / '.env'))

FHIR_SERVER = os.getenv('FHIR_SERVER')
MIMIC_TERMINOLOGY_PATH = os.getenv('MIMIC_TERMINOLOGY_PATH')

def put_resource(resource, fhir_data):
    server = FHIR_SERVER
    url = server + resource + '/' + fhir_data['id']

    resp = requests.put(
        url, json=fhir_data, headers={"Content-Type": "application/fhir+json"}
    )
    output = json.loads(resp.text)
    return output

In [6]:
codesystems = ['chartevents_d_items']
valuesets = ['chartevents_d_items']

# Base path to resources
base_path = Path(MIMIC_TERMINOLOGY_PATH)
version = '0.1.4'  # Need to change version to trigger expansion (does not need to be greater just different)

for codesystem in codesystems:
    codesystem = codesystem.replace('_', '-')
    codesystem_file = f'CodeSystem-{codesystem}.json'
    codesystem_path = base_path / codesystem_file
    with open(codesystem_path, mode='r') as cs_content:
        cs = json.load(cs_content)

    cs['version'] = version
    put_resource('CodeSystem', cs)

for valueset in valuesets:
    valueset = valueset.replace('_', '-')
    valueset_file = f'ValueSet-{valueset}.json'
    valueset_path = base_path / valueset_file
    with open(valueset_path, mode='r') as vs_content:
        vs = json.load(vs_content)

    vs['version'] = version
    put_resource('ValueSet', vs)
