# Condition Occurence Mapping

This is an attempt at mapping FHIR to OMOP using the following guide: https://build.fhir.org/ig/HL7/cdmh/profiles.html#omop-to-fhir-mappings
In this notebook we are mapping FHIR to the OMOP Provider Table



# Load Data Frame from Parquet Catalog File

In [1]:
from pyspark.sql import SparkSession

from pyspark.sql.functions import dayofmonth,month,year,to_date,trunc,split,explode,array,col


# Create a local Spark session
spark = SparkSession.builder.appName('etl').getOrCreate()

# Read in our data
df = spark.read.parquet('catalog.parquet')

In [2]:
df.printSchema()

root
 |-- occurrenceDateTime: string (nullable = true)
 |-- vid: long (nullable = true)
 |-- documentStatus: string (nullable = true)
 |-- resourceType: string (nullable = true)
 |-- lockEndTs: long (nullable = true)
 |-- primarySource: boolean (nullable = true)
 |-- id: string (nullable = true)
 |-- meta: struct (nullable = true)
 |    |-- lastUpdated: string (nullable = true)
 |    |-- versionId: string (nullable = true)
 |-- status: string (nullable = true)
 |-- vaccineCode: struct (nullable = true)
 |    |-- coding: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- system: string (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- display: string (nullable = true)
 |    |-- text: string (nullable = true)
 |-- encounter: struct (nullable = true)
 |    |-- reference: string (nullable = true)
 |-- patient: struct (nullable = true)
 |    |-- reference: string (nullable = true)
 |    |-- display: string 

In [57]:
conditions = df.filter(df.resourceType=='Condition')
conditions.show(1,truncate= False)

+------------------+---+--------------+------------+-------------+-------------+------------------------------------+-----------------------------+------+-----------+-----------------------------------------------+-------+--------+-----+--------+---------+----------+-------+-----+-------+--------+--------------+---------+----+----+-------+--------+-------+----+--------------+--------+------+-----------------------------------------------------+-------+---------+----+-------+----------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------+------------------------------------------------------------------------+-------------------------+-----------------------------------------------------------------------------------+-------------------------+--------+-----------+---------------+-----+------+-------------+------+-----------------+---------+----+-------+------+----

In [102]:
occurence = conditions.select(['id','encounter','clinicalStatus','subject','code','abatementDateTime','onsetDateTime'])
occurence.show(truncate=False)

+------------------------------------+-----------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+-------------------------+-------------------------+
|id                                  |encounter                                      |clinicalStatus                                                          |subject                                              |code                                                                                                                               |abatementDateTime        |onsetDateTime            |
+------------------------------------+-----------------------------------------------+------------------------------------------------------------------------+---------------------------------------------

In [165]:
from pyspark.sql.functions import to_date,to_timestamp,hour, minute, second,date_trunc
occ1=occurence\
    .withColumn("encounter",col("encounter.reference").substr(10,47))\
    .withColumn("subject",col("subject.reference").substr(10,47))\
    .withColumn("aDate", col("abatementDateTime").substr(1,10))\
    .withColumn("aDateTime", col("abatementDateTime").substr(12,8))\
    .withColumn("oDate",col("onsetDateTime").substr(1,10))\
    .withColumn("oDateTime",col("onsetDateTime").substr(12,8))\
    .withColumn("clinicalStatus",col("clinicalStatus.coding.code"))\
    .withColumn("code",col("code.coding.code"))
occ1=occ1.drop(col("abatementDateTime"))
occ1=occ1.drop(col("onsetDateTime"))
occ1.show(truncate=False)

+------------------------------------+------------------------------------+--------------+------------------------------------+-----------------+----------+---------+----------+---------+
|id                                  |encounter                           |clinicalStatus|subject                             |code             |aDate     |aDateTime|oDate     |oDateTime|
+------------------------------------+------------------------------------+--------------+------------------------------------+-----------------+----------+---------+----------+---------+
|8f3eeac6-9b0f-4872-a750-64e12495f343|ee997928-a6cd-540b-187a-79e1876192a5|[resolved]    |f71f0e3d-ec58-117a-3475-256887f6b1ae|[198992004]      |2014-11-10|16:51:45 |2014-10-20|17:51:45 |
|114c3a28-15a2-4fd9-abd7-11228cb97a9c|d29b8016-5348-6962-2331-3ce4affca2fa|[resolved]    |46c99701-db47-cf30-c99c-06c71d319a8e|[84229001]       |2020-03-26|15:21:57 |2020-03-07|14:25:57 |
|c85c19e6-d19c-4df2-bf12-c3d2a2c0a0cc|d3f52592-a370-97a5-1f4

In [166]:
from pyspark.sql.functions import explode, format_string
occ2 = occ1.withColumn("clinicalStatus",explode(col("clinicalStatus")))\
    .withColumn("code",explode(col("code")))\
    .withColumn("aDateTime",format_string("%s %s",col("aDate"),col("aDateTime")))\
    .withColumn("oDateTime",format_string("%s %s",col("oDate"),col("oDateTime")))\
    
occ2.show()
occ2.printSchema()

+--------------------+--------------------+--------------+--------------------+---------------+----------+-------------------+----------+-------------------+
|                  id|           encounter|clinicalStatus|             subject|           code|     aDate|          aDateTime|     oDate|          oDateTime|
+--------------------+--------------------+--------------+--------------------+---------------+----------+-------------------+----------+-------------------+
|8f3eeac6-9b0f-487...|ee997928-a6cd-540...|      resolved|f71f0e3d-ec58-117...|      198992004|2014-11-10|2014-11-10 16:51:45|2014-10-20|2014-10-20 17:51:45|
|114c3a28-15a2-4fd...|d29b8016-5348-696...|      resolved|46c99701-db47-cf3...|       84229001|2020-03-26|2020-03-26 15:21:57|2020-03-07|2020-03-07 14:25:57|
|c85c19e6-d19c-4df...|d3f52592-a370-97a...|        active|cdb934bb-d4ff-e40...|       22298006|      null|          null null|1991-06-16|1991-06-16 13:02:29|
|422cd290-ff34-43e...|222c29db-3c06-89e...|        a

In [172]:
occ3 = occ2.withColumn("aDate",to_date(col("aDate")))\
    .withColumn("aDateTime",to_timestamp(col("aDateTime")))\
    .withColumn("oDate",to_date(col("aDate")))\
    .withColumn("oDateTime",to_timestamp(col("oDateTime")))\
    .withColumn("code",col("code").cast("int"))
    
                
occ3.show()
occ3.printSchema()

+--------------------+--------------------+--------------+--------------------+---------+----------+-------------------+----------+-------------------+
|                  id|           encounter|clinicalStatus|             subject|     code|     aDate|          aDateTime|     oDate|          oDateTime|
+--------------------+--------------------+--------------+--------------------+---------+----------+-------------------+----------+-------------------+
|8f3eeac6-9b0f-487...|ee997928-a6cd-540...|      resolved|f71f0e3d-ec58-117...|198992004|2014-11-10|2014-11-10 16:51:45|2014-11-10|2014-10-20 17:51:45|
|114c3a28-15a2-4fd...|d29b8016-5348-696...|      resolved|46c99701-db47-cf3...| 84229001|2020-03-26|2020-03-26 15:21:57|2020-03-26|2020-03-07 14:25:57|
|c85c19e6-d19c-4df...|d3f52592-a370-97a...|        active|cdb934bb-d4ff-e40...| 22298006|      null|               null|      null|1991-06-16 13:02:29|
|422cd290-ff34-43e...|222c29db-3c06-89e...|        active|760fa8ba-9525-a1b...| 49436004

In [173]:
occ4=occ3.withColumnRenamed("id","condition_occurence_id")\
    .withColumnRenamed("encounter","visit_occurence_id")\
    .withColumnRenamed("clinicalStatus","condition_status_concept_id")\
    .withColumnRenamed("subject","person_id")\
    .withColumnRenamed("code","condition_concept_id")\
    .withColumnRenamed("aDate","condition_start_date")\
    .withColumnRenamed("aDateTime","condition_start_datetime")\
    .withColumnRenamed("oDate","condition_end_date")\
    .withColumnRenamed("oDateTime","condition_end_datetime")
occ4.show()

+----------------------+--------------------+---------------------------+--------------------+--------------------+--------------------+------------------------+------------------+----------------------+
|condition_occurence_id|  visit_occurence_id|condition_status_concept_id|           person_id|condition_concept_id|condition_start_date|condition_start_datetime|condition_end_date|condition_end_datetime|
+----------------------+--------------------+---------------------------+--------------------+--------------------+--------------------+------------------------+------------------+----------------------+
|  8f3eeac6-9b0f-487...|ee997928-a6cd-540...|                   resolved|f71f0e3d-ec58-117...|           198992004|          2014-11-10|     2014-11-10 16:51:45|        2014-11-10|   2014-10-20 17:51:45|
|  114c3a28-15a2-4fd...|d29b8016-5348-696...|                   resolved|46c99701-db47-cf3...|            84229001|          2020-03-26|     2020-03-26 15:21:57|        2020-03-26|   2

In [218]:
provider = df.filter(df['resourceType'] == 'Practitioner')
provider.printSchema()

root
 |-- occurrenceDateTime: string (nullable = true)
 |-- vid: long (nullable = true)
 |-- documentStatus: string (nullable = true)
 |-- resourceType: string (nullable = true)
 |-- lockEndTs: long (nullable = true)
 |-- primarySource: boolean (nullable = true)
 |-- id: string (nullable = true)
 |-- meta: struct (nullable = true)
 |    |-- lastUpdated: string (nullable = true)
 |    |-- versionId: string (nullable = true)
 |-- status: string (nullable = true)
 |-- vaccineCode: struct (nullable = true)
 |    |-- coding: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- system: string (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- display: string (nullable = true)
 |    |-- text: string (nullable = true)
 |-- encounter: struct (nullable = true)
 |    |-- reference: string (nullable = true)
 |-- patient: struct (nullable = true)
 |    |-- reference: string (nullable = true)
 |    |-- display: string 

In [220]:
provider.show(2,truncate=False)

+------------------+---+--------------+------------+-------------+-------------+------------------------------------+-----------------------------+------+-----------+---------+-------+--------+-----+--------+---------+----------------------------------------------------------------------------+-------+-----+-------+--------+--------------+---------+----+----+-------+--------+-------+----+--------------+--------+------+-------+-------+---------+----+-------+--------+------------------+--------------+------------+----+-------------+--------+-----------+---------------+-----+------+-------------+------+-----------------+---------+------------------------------------------------------+-------------------------------------------------------------------------------------------------------+------+---------------------------------------------------------------------------------------------------------------------+---------+---------------+---------------+-------------+----+---------+----------