In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
# Databricks Secret Scope for the Run-As User
dbutils.widgets.text(
  "databricks_secret_scope"
  ,spark.sql("select current_user()").collect()[0][0].split(sep="@")[0].replace(".", "-")
  ,"User's Personal DB Secrets Scope"
)

# Databricks Secret Key for the GitHub PAT in the User's Scope
dbutils.widgets.text(
  "github_token_secret_key"
  ,"gh_pat"
  ,"GitHub Token DB Secret Key"
)

# Run-As User's GitHub User Name
dbutils.widgets.text(
  "github_username"
  ,spark.sql("select current_user()").collect()[0][0]
  ,"User's Personal GitHub Username"
)

In [0]:

from urllib.parse import quote

github_username = quote(dbutils.widgets.get("github_username"))
github_token = quote(dbutils.secrets.get(scope = dbutils.widgets.get("databricks_secret_scope"), key = dbutils.widgets.get("github_token_secret_key")))

print(f"""
   github_username: {github_username}
   github_token: {github_token}   
""")

In [0]:
# %pip install git+https://github.com/databricks-industry-solutions/redox-ehr-api

In [0]:
# %pip install --upgrade git+https://$github_username:$github_token@github.com/databricks-industry-solutions/redox-ehr-api 

## Connect to Redox

In [0]:
redox_private_key = dbutils.secrets.get(scope = dbutils.widgets.get("databricks_secret_scope"), key = "redox_private_key")
redox_client_id = dbutils.secrets.get(scope = dbutils.widgets.get("databricks_secret_scope"), key = "redox_client_id")
# redox_source_id = dbutils.secrets.get(scope = dbutils.widgets.get("databricks_secret_scope"), key = "redox_source_id")
redox_source_id = "91b0ab2f-7b86-441d-9cbf-9b9a6d648d59"
redox_public_kid = dbutils.secrets.get(scope = dbutils.widgets.get("databricks_secret_scope"), key = "redox_kid")

print(f""" 
      redox_private_key: {redox_private_key}
      redox_client_id: {redox_client_id}
      redox_source_id: {redox_source_id}
      redox_public_kid: {redox_public_kid}
""")

In [0]:
import json

redox_auth_json = f"""
{{
  "kty": "RSA",
  "kid": "{redox_public_kid}",
  "alg": "RS384",
  "use": "sig"
}}
"""

json.loads(redox_auth_json)

In [0]:
from redoxwrite.auth import * 
from redoxwrite.endpoint import *

auth = RedoxApiAuth(
  redox_client_id
  ,redox_private_key
  ,redox_auth_json
  ,redox_source_id
)
print("Is connection successful? " + str(auth.can_connect()))

In [0]:
#All Redox FHIR request URLs start with this base: https://api.redoxengine.com/fhir/R4/[organization-name]/[environment-type]/
redox_base_url = 'https://api.redoxengine.com/fhir/R4/redox-fhir-sandbox/Development/'

rapi = RedoxApiRequest(auth, base_url = redox_base_url)

## Interact with FHIR

In [0]:
#creating an observation for remaining length of 4 day stay at a hospital 
observation = """
{
   "resourceType":"Bundle",
   "entry":[
      {
         "resource":{
            "category":[
               {
                  "coding":[
                     {
                        "code":"survey",
                        "display":"Survey",
                        "system":"http://terminology.hl7.org/CodeSystem/observation-category"
                     }
                  ]
               }
            ],
            "code":{
               "coding":[
                  {
                     "code":"78033-8",
                     "display":"Remaining Hospital Stay",
                     "system":"http://loinc.org"
                  }
               ],
               "text":"Remaining Hospital Stay"
            },
            "effectiveDateTime":"2024-01-28T18:06:33.245-05:00",
            "issued":"2024-01-28T18:06:33.245-05:00",
            "resourceType":"Observation",
            "status":"final",
            "valueQuantity":{
               "code":"days",
               "system":"https://www.nubc.org/CodeSystem/RevenueCodes",
               "unit":"days",
               "value":4
            },
            "subject": {
              "reference": "Patient/58117110-ae47-452a-be2c-2d82b3a9e24b"
            },
            "identifier": [
            {
              "system": "urn:databricks",
              "value": "1234567890"
            }
          ]
         }
      },
      {
         "resource":{
           "resourceType": "Patient",
           "identifier": [
            {
              "system": "urn:redox:health-one:MR",
              "value": "0000991458"
            },
            {
              "system": "http://hl7.org/fhir/sid/us-ssn",
              "value": "547-01-9991"
            }
          ]
         }
      }
   ]
}
"""

In [0]:
print(json.loads(json.dumps(observation, indent=2)))

In [0]:
result = rapi.make_request("post", resource="Observation", action="$observation-create", data=observation)

In [0]:
if result['response']['response_status_code'] != 200:
  print("Failed to update the patient information")
print(json.dumps(json.loads(result['response']['response_text']), indent=2))

In [0]:
observation_id = json.loads(result['response']['response_text'])['entry'][0]['response']['location'].split('/')[-3]
response = rapi.make_request("get", resource="Observation", action=observation_id)

data = json.loads(response['response']['response_text'])

assert data['valueQuantity']['value'] == 4
print(json.dumps(data, indent=2))

## Running On Spark

In [0]:
df = spark.createDataFrame([
  ('58117110-ae47-452a-be2c-2d82b3a9e24b' 
  ,3
  ,observation_id
  ,"Observation")], 
['PATIENT_MRN', 'LENGTH_OF_STAY', "OBSERVATION_ID", "resource_type"])

display(df)

In [0]:
import pyarrow
from pyspark.sql.types import StructType, StructField, StringType
from pyspark.sql.functions import col, parse_json, try_parse_json

def process_batches(iterator, http_method, resource, action, data=None):
    for batch in iterator:
        pdf = batch.to_pandas()
        if http_method == "get":
          pdf["response"] = pdf[action].apply(lambda x: rapi.make_request(http_method=http_method, resource=resource, action=x, data=data)['response']['response_text'])
        elif http_method == "post":
          pdf["response"] = pdf[data].apply(lambda x: rapi.make_request(http_method=http_method, resource=resource, action=action, data=x)['response']['response_text'])
        else:
          raise Exception("Invalid http_method")
        yield pyarrow.RecordBatch.from_pandas(pdf)

observation_df = (
    df
    .mapInArrow(lambda iterator: process_batches(
      iterator
      ,http_method="get"
      ,resource="Observation"
      ,action="OBSERVATION_ID")
      ,df.schema.add("response", StringType())
    )
    .withColumn("response", parse_json("response"))
)

display(observation_df)

In [0]:
from pyspark.sql.functions import variant_get, try_variant_get

assert observation_df.select(variant_get(col("response"), "$.valueQuantity.value", "int")).collect()[0][0] == 4

In [0]:
from pyspark.sql.functions import expr, schema_of_variant, schema_of_variant_agg

variant_schemas = (
  observation_df
  .groupBy(col('resource_type'))
  .agg(schema_of_variant_agg(col("response")).alias("response_schema"))
  .withColumn("response_schema", expr("regexp_replace(response_schema, 'OBJECT', 'STRUCT')"))
)
  
# display(variant_schemas)
variant_schemas_dict = {row['resource_type']: row['response_schema'] for row in variant_schemas.collect()}
variant_schemas_dict

In [0]:

updated_observataions = (
  observation_df
  .withColumn("update_data", try_variant_get(col("response"), "$", variant_schemas_dict["Observation"]).withField("valueQuantity.value", col("LENGTH_OF_STAY")))
)

display(updated_observataions)

In [0]:
from pyspark.sql.functions import to_json, struct, col, lit

updated_observataions = updated_observataions.withColumn(
        "new_bundle",
        struct(
            lit("Bundle").alias("resourceType"),
            struct(
                struct(col("update_data").alias("resource")).alias("entry")
            )
        )
    )

display(updated_observataions)

In [0]:
def new_bundle(data):
  data['valueQuantity']['value'] = value
  return json.dumps(
    {
      "resourceType": "Bundle", 
      "entry": [{'resource': data}]
    })

#Create a tuple of (row, post response) 
updated_rdd = ( observation_rdd.
    map(lambda row_response_tuple:
       (
         row_response_tuple[0],  #the row from the DataFrame
         rapi.make_request("post", 
                           resource="Observation", 
                           action ="$observation-update",
                           data = update_observation(
                            row_response_tuple[0].asDict().get('LENGTH_OF_STAY'), #value from DF
                            json.loads(row_response_tuple[1]['response']['response_text']))
          ) #the API payload to use
       )
    )
) 

In [0]:
updated_rdd.toDF(['row', 'response']).select("response.response.response_status_code", "response.response.response_text").show(truncate=False)

In [0]:
observation_id = json.loads(result['response']['response_text'])['entry'][0]['response']['location'].split('/')[-3]
response = rapi.make_request("get", resource="Observation", action=observation_id)
data = json.loads(response['response']['response_text'])

assert data['valueQuantity']['value'] == 3 #now has been updated to 3 instead of 4
print(json.dumps(data, indent=2))