#### This notebook is intended to explore how to get patient bundle from sandboxes from all sorts of publically available servers

In [78]:
from fhirclient import client
from fhirclient.models import bundle, patient, condition, parameters


In [41]:
# Define base API
NLM_API_BASE = "https://lforms-fhir.nlm.nih.gov/baseR4"

In [42]:
# create an instance of the FHIR client
settings = {
    'app_id': 'my_web_app',
    'api_base': NLM_API_BASE
}
smart = client.FHIRClient(settings=settings)
print(f'smart.authorize_url = {smart.authorize_url}')
print(f'smart.ready = {smart.ready}')


smart.authorize_url = None
smart.ready = True


In [51]:
# find all patients
search = patient.Patient.where(struct={})
patients = search.perform_resources(smart.server)
for p in patients[0:5]:
    print(p.as_json())
p_id_lists = [p.id for p in patients]

{'id': 'pat-106', 'meta': {'lastUpdated': '2021-03-02T13:37:31.000-05:00', 'source': '#5Xcz9XcsjCn0FRLU', 'tag': [{'code': 'ri-10k', 'system': 'urn:lf:fhir-tags'}], 'versionId': '1'}, 'text': {'div': '<div xmlns="http://www.w3.org/1999/xhtml"><div class="hapiHeaderText">JIAN <b>MCINTOSH </b></div><table class="hapiPropertyTable"><tbody><tr><td>Address</td><td><span>83637 Fake AIRPORT BLVD </span><br/><span>CUMBERLAND </span><span>MD </span><span>US </span></td></tr><tr><td>Date of birth</td><td><span>01 January 2073</span></td></tr></tbody></table></div>', 'status': 'generated'}, 'active': False, 'address': [{'city': 'CUMBERLAND', 'country': 'US', 'line': ['83637 Fake AIRPORT BLVD'], 'postalCode': '21502', 'state': 'MD', 'text': '83637 Fake AIRPORT BLVD, CUMBERLAND, MD 21502', 'type': 'both', 'use': 'home'}], 'birthDate': '2073', 'deceasedDateTime': '2128', 'gender': 'male', 'name': [{'family': 'MCINTOSH', 'given': ['JIAN'], 'use': 'official'}], 'resourceType': 'Patient'}
{'id': 'pat-2

In [52]:
# search one patient using read
patient_id = p_id_lists[0]
p = patient.Patient.read(patient_id, smart.server)
# Display patient information
print(p.as_json())

{'id': 'pat-106', 'meta': {'lastUpdated': '2021-03-02T13:37:31.000-05:00', 'source': '#5Xcz9XcsjCn0FRLU', 'tag': [{'code': 'ri-10k', 'system': 'urn:lf:fhir-tags'}], 'versionId': '1'}, 'text': {'div': '<div xmlns="http://www.w3.org/1999/xhtml"><div class="hapiHeaderText">JIAN <b>MCINTOSH </b></div><table class="hapiPropertyTable"><tbody><tr><td>Address</td><td><span>83637 Fake AIRPORT BLVD </span><br/><span>CUMBERLAND </span><span>MD </span><span>US </span></td></tr><tr><td>Date of birth</td><td><span>01 January 2073</span></td></tr></tbody></table></div>', 'status': 'generated'}, 'active': False, 'address': [{'city': 'CUMBERLAND', 'country': 'US', 'line': ['83637 Fake AIRPORT BLVD'], 'postalCode': '21502', 'state': 'MD', 'text': '83637 Fake AIRPORT BLVD, CUMBERLAND, MD 21502', 'type': 'both', 'use': 'home'}], 'birthDate': '2073', 'deceasedDateTime': '2128', 'gender': 'male', 'name': [{'family': 'MCINTOSH', 'given': ['JIAN'], 'use': 'official'}], 'resourceType': 'Patient'}


In [53]:
# return a patient bundle using search
# bundle is a list
patient_id = p_id_lists[0]
search = patient.Patient.where(struct={'_id': patient_id})
p_bundle = search.perform_resources(smart.server)
print(p_bundle[0].as_json())

{'id': 'pat-106', 'meta': {'lastUpdated': '2021-03-02T13:37:31.000-05:00', 'source': '#5Xcz9XcsjCn0FRLU', 'tag': [{'code': 'ri-10k', 'system': 'urn:lf:fhir-tags'}], 'versionId': '1'}, 'text': {'div': '<div xmlns="http://www.w3.org/1999/xhtml"><div class="hapiHeaderText">JIAN <b>MCINTOSH </b></div><table class="hapiPropertyTable"><tbody><tr><td>Address</td><td><span>83637 Fake AIRPORT BLVD </span><br/><span>CUMBERLAND </span><span>MD </span><span>US </span></td></tr><tr><td>Date of birth</td><td><span>01 January 2073</span></td></tr></tbody></table></div>', 'status': 'generated'}, 'active': False, 'address': [{'city': 'CUMBERLAND', 'country': 'US', 'line': ['83637 Fake AIRPORT BLVD'], 'postalCode': '21502', 'state': 'MD', 'text': '83637 Fake AIRPORT BLVD, CUMBERLAND, MD 21502', 'type': 'both', 'use': 'home'}], 'birthDate': '2073', 'deceasedDateTime': '2128', 'gender': 'male', 'name': [{'family': 'MCINTOSH', 'given': ['JIAN'], 'use': 'official'}], 'resourceType': 'Patient'}


In [79]:
# return a patient bundle using search with two patient ids
bundle_obj = bundle.Bundle()
bundle_obj.entry = []
for patient_id in p_id_lists[0:2]:
    search = patient.Patient.where(struct={'_id': patient_id})
    patients = search.perform_resources(smart.server)
    for patient_resource in patients:
        entry = bundle.BundleEntry()
        entry.resource = patient_resource
        bundle_obj.entry.append(entry)
# Set the type of the Bundle to 'collection'
bundle_obj.type = 'Bundle'
# Print the Bundle as JSON
print(bundle_obj.as_json())

{'entry': [{'resource': {'id': 'pat-106', 'meta': {'lastUpdated': '2021-03-02T13:37:31.000-05:00', 'source': '#5Xcz9XcsjCn0FRLU', 'tag': [{'code': 'ri-10k', 'system': 'urn:lf:fhir-tags'}], 'versionId': '1'}, 'text': {'div': '<div xmlns="http://www.w3.org/1999/xhtml"><div class="hapiHeaderText">JIAN <b>MCINTOSH </b></div><table class="hapiPropertyTable"><tbody><tr><td>Address</td><td><span>83637 Fake AIRPORT BLVD </span><br/><span>CUMBERLAND </span><span>MD </span><span>US </span></td></tr><tr><td>Date of birth</td><td><span>01 January 2073</span></td></tr></tbody></table></div>', 'status': 'generated'}, 'active': False, 'address': [{'city': 'CUMBERLAND', 'country': 'US', 'line': ['83637 Fake AIRPORT BLVD'], 'postalCode': '21502', 'state': 'MD', 'text': '83637 Fake AIRPORT BLVD, CUMBERLAND, MD 21502', 'type': 'both', 'use': 'home'}], 'birthDate': '2073', 'deceasedDateTime': '2128', 'gender': 'male', 'name': [{'family': 'MCINTOSH', 'given': ['JIAN'], 'use': 'official'}], 'resourceType': 

In [82]:
# a more concise way to return a patient bundle using search with two patient ids
search = patient.Patient.where(struct={'_id': ','.join(p_id_lists[0:2])})
# Perform the search to get the bundle
p_bundle = search.perform(smart.server)
# Print the bundle
print(p_bundle.as_json())

{'id': '2c1e0d97-17a8-4ca0-9ab9-e22a6641a98a', 'meta': {'lastUpdated': '2024-04-17T14:23:36.000-04:00'}, 'entry': [{'fullUrl': 'https://lforms-fhir.nlm.nih.gov/baseR4/Patient/pat-106', 'resource': {'id': 'pat-106', 'meta': {'lastUpdated': '2021-03-02T13:37:31.000-05:00', 'source': '#5Xcz9XcsjCn0FRLU', 'tag': [{'code': 'ri-10k', 'system': 'urn:lf:fhir-tags'}], 'versionId': '1'}, 'text': {'div': '<div xmlns="http://www.w3.org/1999/xhtml"><div class="hapiHeaderText">JIAN <b>MCINTOSH </b></div><table class="hapiPropertyTable"><tbody><tr><td>Address</td><td><span>83637 Fake AIRPORT BLVD </span><br/><span>CUMBERLAND </span><span>MD </span><span>US </span></td></tr><tr><td>Date of birth</td><td><span>01 January 2073</span></td></tr></tbody></table></div>', 'status': 'generated'}, 'active': False, 'address': [{'city': 'CUMBERLAND', 'country': 'US', 'line': ['83637 Fake AIRPORT BLVD'], 'postalCode': '21502', 'state': 'MD', 'text': '83637 Fake AIRPORT BLVD, CUMBERLAND, MD 21502', 'type': 'both',

In [72]:
# TO BE TESTED: use bulk fhir to achieve the similar result
export_request = parameters.Parameters()
export_request.parameter = []
export_request.parameter.append(parameters.ParametersParameter(dict(name='type', valueString='Patient')))
export_request.parameter.append(parameters.ParametersParameter(dict(name='patient', valueString=','.join(p_id_lists[0:2]))))
# Send the export request
response = export_request.perform(smart.server, '_export')
# Check if the request was successful
if response.status_code == 200:
    # Process the response
    bundle = response.resource
    # Print the bundle
    print(bundle.as_json())
else:
    print(f'Failed to retrieve resources. Status code: {response.status_code}')


AttributeError: 'Parameters' object has no attribute 'perform'

In [89]:
import requests
# FHIR server base URL

# Patient IDs for which you want to retrieve resources
patient_ids = ['12345', '67890']

# Construct the search query
kickoff_request_headers = {
    'Prefer': 'respond-async',
    'Accept': 'application/fhir+json',
}
kickoff_request_params = {'_type': 'Observation'}
kickoff_request_url = f'{settings["api_base"]}/Patient/$export'

# Perform the export request
response = requests.get(kickoff_request_url, params=kickoff_request_params, headers=kickoff_request_headers)


In [178]:
# search everything related to a patient.
# Note that in order to avoid overwhelming the client and server, the results of this operation are paged. 
# When this operation is used to access multiple patient records at once, the return bundle could be rather a lot of data; servers may choose to require that such requests are made asynchronously
# get total count
count = 0
p_id = p_id_lists[3]
patient_centered_url = f'{settings["api_base"]}/Patient/{p_id}/$everything?_count={count}'
response = requests.get(patient_centered_url)
response_json = response.json()
total_count = response_json['total']
print(f'Total count: {total_count}')
# Somehow the pagination does not work. Use the total count to get all resources
# if the count is 100, then it will work for 200 link, for the next link it will return error
# if count is 1, it will fail at 251.
# if count is 10, it will fail at 250.
# if count is 1000, it will fail at 501.
if total_count >= 100000:
    total_count = 100000
    print(f'results too large. Only return first {count} resources')
patient_centered_url = f'{settings["api_base"]}/Patient/{p_id_lists[1]}/$everything?_count={total_count}'
response = requests.get(patient_centered_url)
response_json = response.json()
print(f'Number of total resources: {len(response_json["entry"])}')
# the entry number is different (smaller than) from the total count.
# for example, in pat-232. 1474 entries (when count=0), but the total is 738.
# actually if go to next link, it report total is 1475.
# in pat-106, 1475 entries, but the total is 6419.
# in pat-269, 1023 entries, but the total is 2044
import json
json.dump(response_json, open(f'../../data/processed/v0/{p_id}_everything_{len(response_json["entry"])}.json', 'w'), indent=2)

Total count: 3503
Number of total resources: 1475


In [184]:
count=10000
p_id = p_id_lists[3]
patient_centered_url = f'{settings["api_base"]}/Patient/{p_id}/$everything?_count={count}'
all_resources = []
while True:
    response = requests.get(patient_centered_url)
    if response.status_code == 200:
        not_break = False
        resources = response.json()
        if 'entry' in resources:
            all_resources.extend(resources['entry'])
        if 'link' in resources:
            for link in resources['link']:
                # get next page
                if link['relation'] == 'next':
                    patient_centered_url = link['url']
                    print(f'Next page: {patient_centered_url}')
                    not_break = True
        if not not_break:
            break
    else:
        print(f'Failed. Status code: {response.status_code}')
        break
print(f'Number of total resources: {len(all_resources)}')
import json
json.dump(all_resources, open(f'../../data/processed/v0/{p_id}_everything_page_{len(all_resources)}.json', 'w'), indent=2)

Number of total resources: 3504


### A pattern can be found.
- if count = 0, the total number entries in the summary page is less than the actually total.
- if use the total count in the summary page, the actual entries returned is less than total.
- use pagination approach, it will fail at 200, 250, 251, 501, for count = 1, 10, 100, 1000
- Based on testing, it seems like it is to go for count = 10000.