# MIMIC-FHIR Tutorial
Using Pathling, analyze the mimic-fhir resources

In [None]:
from pathlib import Path
import requests
import json
import ndjson
import pandas as pd

import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

from fhirclient.models.parameters import Parameters, ParametersParameter
from py_mimic_fhir.lookup import MIMIC_FHIR_RESOURCES

In [None]:
import_folder = 'file:///usr/share/staging'
server = 'http://localhost:8080/fhir'

<a id='index'></a>
### Index of operations
- [Import](#import-resources)
    - [Import MIMIC-FHIR](#import-all-mimic-fhir-resources-to-pathling)
- [Validation](#validate-resources)
- [Search](#search-resources)
    - [Search and export by gender](#search-and-export-by-gender)
- [Aggregate](#aggregate-resources)
    - [Conditions](#aggregate-conditions)
    - [Gender](#aggregate-gender)
    - [Heparin Medication](#aggregate-heparin-medication)
    - [Male patients with hypertension](#aggregate-male-patients-with-hypertension)
    - [Top EMAR medication](#aggregate-top-emar-medication)
    - [Top ICU medication](#aggregate-top-icu-medication)
    - [Medication for hypertension patients](#aggregate-medication-from-patients-with-hypertension)
    - [Top lab events](#aggregate-lab-events)
    - [Top labs for hypertension patients](#aggregate-top-labs-for-hypertension-patient)
- [Extract](#extract-resource-table)




# Import Resources
[back to index](#index-of-operations)

In [None]:
def generate_import_parameters(import_folder, profile, resource, mode):
    param_resource = Parameters()

    param_resource_type = ParametersParameter()
    param_resource_type.name= 'resourceType'
    param_resource_type.valueCode = resource

    param_url = {}
    param_url['name'] = 'url'
    param_url['valueUrl'] = f'{import_folder}/{profile}.ndjson'

    param_mode = ParametersParameter()
    param_mode.name= 'mode'
    param_mode.valueCode = mode

    param_source = ParametersParameter()
    param_source.name = 'source'
    param_source.part = [param_resource_type, param_url, param_mode]
    param_resource.parameter = [param_source]
    
    return param_resource.as_json()

In [None]:
def post_import_ndjson(server, param):
    url = f'{server}/$import'

    resp = requests.post(url,  json = param, headers={"Content-Type": "application/fhir+json"} )
    return resp 

### Import all mimic-fhir resources to Pathling
[back to index](#index-of-operations)

In [None]:
mode = 'merge' # overwrite for fresh load (but not really since need to merge Observations not overwrite)

for profile, resource in MIMIC_FHIR_RESOURCES.items():
    # ObservationChartevents too large and crashing all the observation searches
    if (profile != 'ObservationChartevents'):
        param = generate_import_parameters(import_folder, profile, resource, mode)
        resp = post_import_ndjson(server, param)
        print(f"{profile}: {resp.json()['issue'][0]['diagnostics']}")

# Validate Resources
[back to index](#index-of-operations)

In [None]:
param_validate = Parameters()

param_resource_type = ParametersParameter()
param_resource_type.name= 'resourceType'
param_resource_type.valueCode = resource

param_url = {}
param_url['name'] = 'url'
param_url['valueUrl'] = f'{import_folder}/{profile}.ndjson'

param_mode = ParametersParameter()
param_mode.name= 'mode'
param_mode.valueCode = mode

param_source = ParametersParameter()
param_source.name = 'source'
param_source.part = [param_resource_type, param_url, param_mode]
param_resource.parameter = [param_source]



In [None]:
url = f'{server}/Patient/$validate'

resp = requests.post(url, json=param_validate.json()  headers={"Content-Type": "application/fhir+json"} )
resp.json()

# Search Resources
[back to index](#index-of-operations)

In [None]:
def get_query(server, resource_type, filter_path, count=10):
    url = f'{server}/{resource_type}?_query=fhirPath&filter={filter_path}&_count={count}' 

    resp = requests.get(url,  headers={"Content-Type": "application/fhir+json"} )
    return resp.json()

def export_resources(resp, resource_type, output_path):
    output_file = f'{output_path}/{resource_type}.ndjson'

    with open(output_file, 'w+') as outfile:
        writer = ndjson.writer(outfile)
        for entry in resp['entry']:
            writer.writerow(entry['resource'])


#### Search and export by gender
[back to index](#index-of-operations)

In [None]:
resource_type = 'Patient'
filter_path = "gender='male'" # ((reverseResolve(Condition.subject).code.coding.where($this.subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd9|4019)).code).empty()) and (gender = 'male')
count = 100

resp = get_query(server, resource_type, filter_path, count)
resp

In [None]:
output_path = 'output'
export_resources(resp, resource_type, output_path)

# Aggregate Resources
[back to index](#index-of-operations)

In [None]:
def get_aggregate(resource_type, element_path, filter_path=None):
    server = 'http://localhost:8000/fhir'    
    url = f'{server}/{resource_type}/$aggregate?aggregation=count()&grouping={element_path}'

    if filter_path is not None:
        url = f'{url}&filter={filter_path}'
    

    resp = requests.get(url, headers={"Content-Type": "application/fhir+json"} )
    return resp.json()

def plot_aggregate(resp, title, limit, size=[12,8], rotation=90):
     parameters = resp['parameter']
     list_label= []
     list_value = []
     for parameter in parameters:
          if (len(parameter['part'][0]) == 2):
               label_val = list(parameter['part'][0].values())[1]
          else:
               label_val = 'WITHOUT'
          list_label.append(label_val)
          list_value.append(parameter['part'][1]['valueUnsignedInt'])

     df = pd.DataFrame({'label': list_label, 'value': list_value})
     df_sorted = df.sort_values(by=['value'], ascending=True).iloc[-limit:] 
     plt.figure(figsize=size)
     plt.barh(df_sorted['label'], df_sorted['value'])
    #plt.xticks(rotation=rotation)
     plt.title(title)
     plt.show()


#### Aggregate conditions
[back to index](#index-of-operations)

In [None]:
resource_type = 'Condition'
element_path = 'code.coding.display'
title = f'{resource_type}: {element_path}'
limit = 20

resp = get_aggregate(resource_type, element_path)
plot_aggregate(resp, title, limit)

#### Aggregate gender
[back to index](#index-of-operations)

In [None]:
resource_type = 'Patient'
element_path = 'gender'
limit=10
size = [6,6]

resp = get_aggregate(resource_type, element_path)
plot_aggregate(resp, element_path, limit, size, rotation=45)

#### Aggregate male patients with hypertension
[back to index](#index-of-operations)

In [None]:
resource_type = 'Patient'
#element_path = "reverseResolve(Condition.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd9|4019))"
element_path = "reverseResolve(Condition.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd9|4019)).code"
filter_path="gender='male'"
title = f'{resource_type}: Males with hypertension'
limit = 10

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit, size=[6,6])

#### Aggregate heparin medication
[back to index](#index-of-operations)

In [None]:
resource_type = 'Patient'
element_path = "reverseResolve(MedicationAdministration.subject).medicationCodeableConcept.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/medication-formulary-drug-cd|HEPA5I)).code"
title = f'{resource_type}: Heparin meds'
limit = 10

resp = get_aggregate(resource_type, element_path)
resp
plot_aggregate(resp, title, limit, size=[6,6])

#### Aggregate top emar medication
[back to index](#index-of-operations)

In [None]:
resource_type = 'MedicationAdministration'
element_path = "medicationCodeableConcept.coding.code"
filter_path = "meta.where(profile.first()='http://fhir.mimic.mit.edu/StructureDefinition/mimic-medication-administration').empty().not()"
title = f'{resource_type}: Top EMAR medication administered'
limit = 10

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

#### Aggregate top ICU medication 
[back to index](#index-of-operations)

In [None]:
resource_type = 'MedicationAdministration'
element_path = "medicationCodeableConcept.coding.display"
filter_path = "meta.where(profile.first()='http://fhir.mimic.mit.edu/StructureDefinition/mimic-medication-administration-icu').empty().not()"
title = f'{resource_type}: Top ICU medication administered'
limit = 10

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

#### Aggregate medication from patients with hypertension
[back to index](#index-of-operations)

In [None]:
# patients with hypertension, what are the top meds?
resource_type = 'Patient'
element_path = "reverseResolve(MedicationAdministration.subject).medicationCodeableConcept.coding.display"
filter_path = "reverseResolve(Condition.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd10|I2510)).empty().not()"
title = f'{resource_type}: Top medication administered for Hypertension patients'
limit = 20

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

In [None]:
# patients with atrial fibrillation and their procedures
resource_type = 'Patient'
element_path = "reverseResolve(Procedure.subject).code.coding.display"
filter_path = "reverseResolve(Condition.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd10|Z794)).empty().not()"
title = f'{resource_type}: Top procedures for Atrial Fibrillation Patients'
limit = 15

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

In [None]:
# patients with acute kidney failure and their procedures
resource_type = 'Patient'
element_path = "reverseResolve(Procedure.subject).code.coding.display"
filter_path = "reverseResolve(Condition.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd9|53081)).empty().not()"
title = f'{resource_type}: Top procedures for Acute Kidney Failure Patients'
limit = 15

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

#### Aggregate lab events
[back to index](#index-of-operations)

In [None]:
# try observation again... very large cause of chartevents... (could remove chartevents for demo so can at least look at others)
resource_type = 'Observation'
element_path = "code.coding.display"
filter_path = "meta.where(profile.first()='http://fhir.mimic.mit.edu/StructureDefinition/mimic-observation-labevents').empty().not()"
title = f'{resource_type}: Top observation labs'
limit = 10

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

#### Aggregate top labs for hypertension patient
[back to index](#index-of-operations)

In [None]:
# top labs for patients with hypertension
resource_type = 'Patient'
element_path = "reverseResolve(Observation.subject).code.coding.where(system.first()='http://fhir.mimic.mit.edu/CodeSystem/d-labitems').display"
filter_path = "reverseResolve(Condition.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd10|R188)).empty().not()"
title = f'{resource_type}: Top labs runs for Atrial Fibrillation patients'
limit = 20

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

#### Aggregate conditions associated with lab tests

In [None]:
# top labs for patients with hypertension
resource_type = 'Patient'
element_path = "reverseResolve(Condition.subject).code.coding.where(system.first()='http://fhir.mimic.mit.edu/CodeSystem/diagnosis-icd10').code"
filter_path = "reverseResolve(Observation.subject).code.coding.where(subsumedBy(http://fhir.mimic.mit.edu/CodeSystem/d-labitems|50843)).empty().not()"
title = f'{resource_type}: Top conditions with Lactate Dehydrogenase lab test'
limit = 10

resp = get_aggregate(resource_type, element_path, filter_path)
resp
plot_aggregate(resp, title, limit)

### Extract resource table
[back to index](#index-of-operations)

In [None]:
def get_extract(server, resource_type, columns, limit):      
    url = f'{server}/{resource_type}/$extract?'

    for column in columns:
        url = f'{url}column={column}&'
    
    url = f'{url}limit={limit}'
    resp = requests.get(url, headers={"Content-Type": "application/fhir+json"} )
    return resp.json()

In [None]:
server = 'http://localhost:8080/fhir'  
resource_type = 'Patient'
column1 = 'gender'
column2 = 'birthDate'
columns = [column1, column2]
title = f'{resource_type}: {element_path}'
limit = 10

resp = get_extract(server, resource_type, columns, limit)
resp

#### Streaming data 

In [None]:
server = 'http://localhost:8080/fhir'
url = f'{server}/metadata?mode=terminology '  
resp = requests.get(url=url, headers={"Content-Type": "application/fhir+json"})
resp.json()

#### $everything
- only writing out patient, condition, and procedure right now...

In [None]:
url = 'http://localhost:8080/fhir/Patient/a6e7e991-6801-5425-b435-4ca6b7decfcc/$everything?_type=Encounter'
resp = requests.get(url=url, headers={"Content-Type": "application/fhir+json"})

with open('output/patient_everything.ndjson', 'w+') as patfile:
    writer = ndjson.writer(patfile)
    i=0
    for entry in resp.json()['entry']:
        i=i+1
        print(f'writing resource {i}')
        writer.writerow(entry['resource'])



In [None]:
url = 'http://localhost:8080/fhir/Patient?gender=male'
resp = requests.get(url=url, headers={"Content-Type": "application/fhir+json"})

resp.json()



In [None]:
url = 'http://localhost:8080/fhir/Specimen/?subject=0a8eebfd-a352-522e-89f0-1d4a13abdebc'
resp = requests.get(url=url, headers={"Content-Type": "application/fhir+json"})

resp.json()