In [1]:
bronze_location = "abfss://bronze@prsynapselab.dfs.core.windows.net/"
write_mode="overwrite"


In [2]:
patientSchema = spark.read.json(bronze_location+"reference_data/000047ca-00c7-492b-bf65-740805144cd2/Patient.ndjson").schema
patient_df = spark.read.schema(patientSchema).json(bronze_location+"reference_data/*/*.ndjson")

In [3]:
patient_df.printSchema()

In [4]:
display(patient_df)

In [5]:
patient_df_selected = patient_df.select("id","birthDate","deceasedDateTime","gender","text","multipleBirthBoolean","multipleBirthInteger","resourceType","text.div","text.status","identifier","address") \
                                        .toDF(*("patient_id","birthDate","deceasedDateTime","gender","text","multipleBirthBoolean","multipleBirthInteger","resourceType","div","status","identifier","address"))

In [6]:
patient_df_selected.createOrReplaceTempView("patient_df_selected")

In [7]:
%%spark
import org.apache.spark.sql.types.{StructType,ArrayType}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions.explode_outer

def flattenDataFrame(df: DataFrame): DataFrame = {
  val fields = df.schema.fields
  val fieldNames = fields.map(x => x.name)

  for (i <- fields.indices) {
    val field = fields(i)
    val fieldType = field.dataType
    val fieldName = field.name
    fieldType match {
      case _: ArrayType =>
        val fieldNamesExcludingArray = fieldNames.filter(_ != fieldName)
        val fieldNamesAndExplode = fieldNamesExcludingArray ++ Array(
          s"explode_outer($fieldName) as $fieldName"
        )
        val explodedDf = df.selectExpr(fieldNamesAndExplode: _*)
        return flattenDataFrame(explodedDf)
      case structType: StructType =>
        val childFieldNames =
          structType.fieldNames.map(childname => fieldName + "." + childname)
        val newFieldNames = fieldNames.filter(_ != fieldName) ++ childFieldNames
        import org.apache.spark.sql.functions.col

        val renamedCols =
          newFieldNames.map { x =>
            col(x.toString).as(x.toString.replace(".", "_"))
          }

        val explodedDf = df.select(renamedCols: _*)
        return flattenDataFrame(explodedDf)
      case _ =>
    }
  }

  df
}

## Creating Patient Identifier Table

In [8]:
%%spark
val patient_identifier_df = spark.sql("select * from patient_df_selected").drop("address");


In [9]:
%%spark
val patient_identifier_df_flattened = flattenDataFrame(patient_identifier_df)

In [10]:
%%spark
display(patient_identifier_df_flattened)

In [11]:
%%spark
val silver_location = "abfss://silver@prsynapselab.dfs.core.windows.net/"
patient_identifier_df_flattened.coalesce(50).write.format("delta").option("path", silver_location+"PatientIdentifier").saveAsTable("fhir.PatientIdentifier")


## Creating Patient Address Table

In [12]:
%%spark
val patient_address_df = spark.sql("select patient_id, address from patient_df_selected")

In [13]:
%%spark
display(patient_address_df)

In [14]:
%%spark
val patient_address_df_flattened = flattenDataFrame(patient_address_df)

In [15]:
%%spark
val silver_location = "abfss://silver@prsynapselab.dfs.core.windows.net/"

patient_address_df_flattened.coalesce(50).write.format("delta").option("path", silver_location+"PatientAddress").saveAsTable("fhir.PatientAddress")