In [None]:
# Create the silver OBSERVATION table
# This table stores clinical observations extracted from CCDA documents.
# Observations come from multiple CCDA sections (Results, Vital Signs, etc.)
# and are unified into this single target table.
spark.sql("""
CREATE TABLE IF NOT EXISTS observation (
    OBSERVATION_ID STRING COMMENT 'Unique ID for the observation',
    ENCOUNTER_ID STRING COMMENT 'ID of the related encounter',
    SERVICE_REQUEST_ID STRING,
    SPECIMEN_ID STRING,
    DEVICE_ID STRING,
    PATIENT_ID STRING COMMENT 'Patient identifier',
    PROVENANCE_ID STRING,
    MEDICATION_REQUEST_ID STRING,
    IMMUNIZATION_ID STRING,
    ORGANIZATION_ID STRING COMMENT 'Organization identifier',
    PRACTITIONER_ID STRING COMMENT 'Practitioner identifier',
    PRACTITIONER_ROLE_ID STRING,
    OBSERVATION_STATUS_CODE STRING COMMENT 'Status: registered, preliminary, final, amended, corrected, cancelled, entered-in-error, unknown',
    OBSERVATION_TYPE_SYSTEM_NAME STRING COMMENT 'Code system name (e.g. LOINC, SNOMED)',
    OBSERVATION_TYPE_SYSTEM_CODE STRING COMMENT 'Code value within the system',
    OBSERVATION_TYPE_DISPLAY_NAME STRING COMMENT 'Human-readable display name for the observation type',
    OBSERVATION_TYPE_TEXT STRING,
    COMMENT STRING,
    EFFECTIVE_START_TMSTP STRING COMMENT 'Observation effective period start',
    EFFECTIVE_END_TMSTP STRING COMMENT 'Observation effective period end',
    EFFECTIVE_TMSTP STRING COMMENT 'Point-in-time effective timestamp',
    ISSUE_TMSTP STRING,
    RESULT_VALUE_TYPE STRING COMMENT 'Data type of the result value (PQ, ST, CD, etc.)',
    VALUE_NUMERIC STRING COMMENT 'Numeric result value',
    VALUE_TEXT STRING COMMENT 'Text result value',
    VALUE_BOOLEAN STRING COMMENT 'Boolean result value',
    VALUE_TMSTP STRING COMMENT 'Datetime result value',
    UNIT_UCUM_CODE STRING COMMENT 'Unit of measure (UCUM)',
    DATA_ABSENT_REASON_CODE STRING COMMENT 'Reason for missing value (NullFlavor)',
    ABNORMAL_IND STRING COMMENT 'Abnormal indicator based on interpretation code',
    BODY_SITE_CODE STRING,
    BODY_SITE_TEXT STRING,
    METHOD_CODE STRING,
    COMPONENT_CODE STRING,
    COMPONENT_VAL STRING,
    COMPONENT_DATA_ABSENT_REASON_TEXT STRING,
    OBSERVATION_INTERPRETATION STRING,
    OBSERVATION_CATEGORY STRING COMMENT 'Category derived from CCDA section (laboratory, vital-signs, etc.)',
    OBSERVATION_NOTE STRING,
    OBSERVATION_REFERENCE_RANGE STRING,
    SOURCE_SYSTEM_CODE STRING,
    RECORD_START_TMSTP TIMESTAMP,
    RECORD_END_TMSTP TIMESTAMP,
    LATEST_RECORD_IND BOOLEAN,
    DELETED_IND BOOLEAN,
    SOURCE_INSERT_TMSTP TIMESTAMP,
    SOURCE_UPDATE_TMSTP TIMESTAMP,
    RAW_JSON_TEXT STRING COMMENT 'Original JSON entry for audit and reprocessing',
    SECTION_LOINC_CODE STRING COMMENT 'CCDA section LOINC code this observation was extracted from',
    BRONZE_ID STRING COMMENT 'Reference back to the bronze source row',
    ROW_INSERT_TIMESTAMP TIMESTAMP COMMENT 'Silver layer insert timestamp'
) USING DELTA
""")
print("Silver OBSERVATION table created or already exists.")

In [None]:
# Create the silver ENCOUNTER table
# This table stores encounter/visit data extracted from CCDA documents.
# Encounters are extracted from the Encounters section (LOINC 46240-8)
# with patient demographics joined from the document root.
spark.sql("""
CREATE TABLE IF NOT EXISTS encounter (
    ENCOUNTER_ID STRING COMMENT 'Unique ID for the encounter',
    EPISODE_OF_CARE_ID STRING,
    PATIENT_ID STRING COMMENT 'Patient identifier',
    PATIENT_NAME_FAMILY STRING COMMENT 'Patient family name',
    PATIENT_NAME_GIVEN STRING COMMENT 'Patient given name',
    ENCOUNTER_PARENT_ID STRING,
    SERVICE_PROVIDER_ID STRING,
    ENCOUNTER_STATUS_CODE STRING COMMENT 'Status: in-progress, finished, cancelled, etc.',
    ENCOUNTER_CLASS_CODE STRING,
    ENCOUNTER_SERVICE_TYPE_CODE STRING,
    ENCOUNTER_SERVICE_TYPE_TEXT STRING COMMENT 'Description of the encounter type',
    PRIORITY_CODE STRING,
    PRIORITY_TEXT STRING,
    ENCOUNTER_START_TMSTP STRING COMMENT 'Encounter start date/time',
    ENCOUNTER_END_TMSTP STRING COMMENT 'Encounter end date/time',
    ENCOUNTER_DURATION_VAL STRING,
    PRE_ADMISSION_ID STRING,
    SOURCE_LOCATION_ID STRING,
    SOURCE_ORGANIZATION_ID STRING,
    ADMIT_SOURCE_CODE STRING,
    ADMIT_SOURCE_TEXT STRING,
    READMISSION_SOURCE_CODE STRING,
    READMISSION_SOURCE_TEXT STRING,
    DESTINATION_LOCATION_ID STRING,
    DESTINATION_ORGANIZATION_ID STRING,
    DISCHARGE_DISPOSITION_CODE STRING,
    DISCHARGE_DISPOSITION_TEXT STRING,
    ENCOUNTER_STATUS_HISTORY STRING,
    ENCOUNTER_CLASS_HISTORY STRING,
    ENCOUNTER_TYPE STRING,
    ENCOUNTER_REASON STRING,
    ENCOUNTER_DIET_PREFERENCE STRING,
    ENCOUNTER_DIAGNOSIS_CONDITION STRING,
    ENCOUNTER_LOCATION STRING,
    ENCOUNTER_SPECIAL_COURTESY STRING,
    ENCOUNTER_SPECIAL_ARRANGEMENT STRING,
    ENCOUNTER_PARTICIPANT STRING,
    ENCOUNTER_PERFORMER_NAME STRING COMMENT 'Performing provider name',
    ENCOUNTER_PERFORMER_PHONE STRING COMMENT 'Performing provider phone',
    ENCOUNTER_LOCATION_CITY STRING COMMENT 'Location city',
    ENCOUNTER_LOCATION_STATE STRING COMMENT 'Location state',
    SOURCE_SYSTEM_CODE STRING,
    RECORD_START_TMSTP TIMESTAMP,
    RECORD_END_TMSTP TIMESTAMP,
    LATEST_RECORD_IND BOOLEAN,
    DELETED_IND BOOLEAN,
    SOURCE_INSERT_TMSTP TIMESTAMP,
    SOURCE_UPDATE_TMSTP TIMESTAMP,
    RAW_JSON_TEXT STRING COMMENT 'Original JSON entry for audit and reprocessing',
    BRONZE_ID STRING COMMENT 'Reference back to the bronze source row',
    ROW_INSERT_TIMESTAMP TIMESTAMP COMMENT 'Silver layer insert timestamp'
) USING DELTA
""")
print("Silver ENCOUNTER table created or already exists.")