AkHQ: http://localhost:8082  

https://github.com/aehrc/pathling/tree/issue/452/lib/python#python-api-for-pathling

# Try Connect To Test FHIR Kafka

In [1]:
!python --version

Python 3.9.12


In [2]:
import findspark
findspark.init()
findspark.find()

'/usr/local/spark'

In [3]:
appName = "Kafka, Spark and FHIR Data"
master = "local[*]"
kafka_topic = "fhir.post-gateway-kdb"

In [4]:
from pyspark.sql import SparkSession
from pathling.etc import find_jar

spark = SparkSession.builder \
    .appName(appName) \
    .master(master) \
    .config('spark.jars', find_jar()) \
    .getOrCreate()

In [5]:
# https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html
# default for startingOffsets is "latest", but "earliest" allows rewind for missed alerts    
df = spark \
  .readStream  \
  .format("kafka") \
  .option("kafka.bootstrap.servers", "kafka1:19092") \
  .option("subscribe", kafka_topic) \
  .option("startingOffsets", "earliest") \
  .load()

In [6]:
df.printSchema()

root
 |-- key: binary (nullable = true)
 |-- value: binary (nullable = true)
 |-- topic: string (nullable = true)
 |-- partition: integer (nullable = true)
 |-- offset: long (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- timestampType: integer (nullable = true)



In [7]:
import os
os.getenv("PYSPARK_SUBMIT_ARGS")

'     --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.2.1,au.csiro.pathling:encoders:5.0.3-SNAPSHOT     --repositories https://oss.sonatype.org/content/repositories/snapshots     pyspark-shell'

In [8]:
query = df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
          .writeStream \
          .queryName("gettable") \
          .format("memory") \
          .start()

# close connection after 15 seconds
query.awaitTermination(15)

False

In [10]:
kafka_data = spark.sql("select * from gettable")
kafka_data.show()
type(kafka_data)

+-------------------+--------------------+
|                key|               value|
+-------------------+--------------------+
|    Patient/patient|{"resourceType": ...|
|Condition/condition|{"resourceType": ...|
|Condition/condition|{"resourceType": ...|
|Procedure/procedure|{"resourceType": ...|
|Encounter/encounter|{"resourceType": ...|
+-------------------+--------------------+



pyspark.sql.dataframe.DataFrame

In [11]:
pd_df = kafka_data.toPandas()
pd_df

Unnamed: 0,key,value
0,Patient/patient,"{""resourceType"": ""Bundle"", ""meta"": {""security""..."
1,Condition/condition,"{""resourceType"": ""Bundle"", ""meta"": {""security""..."
2,Condition/condition,"{""resourceType"": ""Bundle"", ""meta"": {""security""..."
3,Procedure/procedure,"{""resourceType"": ""Bundle"",""meta"": {""security"":..."
4,Encounter/encounter,"{""resourceType"": ""Bundle"", ""meta"": {""security""..."


# Bring Pathling into the game

In [12]:
from pathling.r4 import bundles

resources = bundles.from_json(kafka_data, 'value')

In [13]:
patients = bundles.extract_entry(spark, resources, 'Patient')
encounter = bundles.extract_entry(spark, resources, 'Encounter')
condition = bundles.extract_entry(spark, resources, 'Condition')

In [14]:
patients.printSchema()

root
 |-- id: string (nullable = true)
 |-- id_versioned: string (nullable = true)
 |-- meta: struct (nullable = true)
 |    |-- id: string (nullable = true)
 |    |-- versionId: string (nullable = true)
 |    |-- versionId_versioned: string (nullable = true)
 |    |-- lastUpdated: timestamp (nullable = true)
 |    |-- source: string (nullable = true)
 |    |-- profile: array (nullable = true)
 |    |    |-- element: string (containsNull = true)
 |    |-- security: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- system: string (nullable = true)
 |    |    |    |-- version: string (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- display: string (nullable = true)
 |    |    |    |-- userSelected: boolean (nullable = true)
 |    |-- tag: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable

In [15]:
patients.select("id", "birthDate", "gender", "address.postalCode").toPandas()

Unnamed: 0,id,birthDate,gender,postalCode
0,patient,1990-01-01,female,[12345]


In [16]:
encounter.printSchema()

root
 |-- id: string (nullable = true)
 |-- id_versioned: string (nullable = true)
 |-- meta: struct (nullable = true)
 |    |-- id: string (nullable = true)
 |    |-- versionId: string (nullable = true)
 |    |-- versionId_versioned: string (nullable = true)
 |    |-- lastUpdated: timestamp (nullable = true)
 |    |-- source: string (nullable = true)
 |    |-- profile: array (nullable = true)
 |    |    |-- element: string (containsNull = true)
 |    |-- security: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- system: string (nullable = true)
 |    |    |    |-- version: string (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- display: string (nullable = true)
 |    |    |    |-- userSelected: boolean (nullable = true)
 |    |-- tag: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable

In [19]:
encounter.select("id", "subject.reference", "serviceType.coding.code", "period.start", "period.end").toPandas()

Unnamed: 0,id,reference,code,start,end
0,encounter,Patient/patient,"[2200, 2200]",2021-11-09T05:41:53+00:00,


In [20]:
condition.printSchema()

root
 |-- id: string (nullable = true)
 |-- id_versioned: string (nullable = true)
 |-- meta: struct (nullable = true)
 |    |-- id: string (nullable = true)
 |    |-- versionId: string (nullable = true)
 |    |-- versionId_versioned: string (nullable = true)
 |    |-- lastUpdated: timestamp (nullable = true)
 |    |-- source: string (nullable = true)
 |    |-- profile: array (nullable = true)
 |    |    |-- element: string (containsNull = true)
 |    |-- security: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- system: string (nullable = true)
 |    |    |    |-- version: string (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- display: string (nullable = true)
 |    |    |    |-- userSelected: boolean (nullable = true)
 |    |-- tag: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable

In [21]:
condition.select("id", "encounter.reference", "code.coding.code").toPandas()

Unnamed: 0,id,reference,code
0,condition,Encounter/encounter,[H57.0]
1,condition,Encounter/encounter,[H57.0]
