In [0]:
%sql
CREATE DATABASE IF NOT EXISTS clinical_trial_gold;

1. Gold - Patient Summary

In [0]:
# Drop table if exists to avoid schema merge errors
spark.sql("DROP TABLE IF EXISTS clinical_trial_gold.patient_summary")

from pyspark.sql.functions import col, sum, max, datediff

patients = spark.table("clinical_trial_silver.patients")
dosing = spark.table("clinical_trial_silver.drug_dosing")
aes = spark.table("clinical_trial_silver.adverse_events")
outcomes = spark.table("clinical_trial_silver.outcomes")


# Exposure summary

exposure = (
    dosing.groupBy("patient_id")
    .agg(
        sum("exposed_flag").alias("doses_taken"),
        max("dose_date").alias("last_dose_date")
    )
)


# SAE flag

sae_flag = (
    aes.groupBy("patient_id")
    .agg(
        max("serious").cast("int").alias("any_serious_ae")
    )
)

gold_patient_summary = (
    patients
    .join(exposure, on="patient_id", how="left")
    .join(sae_flag, on="patient_id", how="left")
    .join(outcomes.select("patient_id", "best_response"), on="patient_id", how="left")
)

(
    gold_patient_summary
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("clinical_trial_gold.patient_summary")
)