In [0]:
DECLARE OR REPLACE VARIABLE catalog_use STRING DEFAULT 'fhir_workshop';
DECLARE OR REPLACE VARIABLE schema_use STRING DEFAULT 'matthew_giglia';

In [0]:
SET VARIABLE catalog_use = :`catalog_use`; 
SET VARIABLE schema_use = :`schema_use`;  

In [0]:
SELECT 
  catalog_use
  ,schema_use
;

In [0]:
USE IDENTIFIER(catalog_use || '.' || schema_use);

In [0]:
SELECT
  current_catalog()
  ,current_schema()
;

In [0]:
SHOW VOLUMES;

In [0]:
DECLARE OR REPLACE VARIABLE list_stmnt STRING;

SET VAR list_stmnt = "LIST '/Volumes/" || catalog_use || "/" || schema_use || "/landing/'";

SELECT list_stmnt;

In [0]:
EXECUTE IMMEDIATE list_stmnt;

In [0]:
SET VAR list_stmnt = "LIST '/Volumes/" || catalog_use || "/" || schema_use || "/landing/'";

SELECT list_stmnt;

EXECUTE IMMEDIATE list_stmnt;

In [0]:
DROP TABLE IF EXISTS fhir_bronze;

CREATE OR REFRESH STREAMING TABLE fhir_bronze 
COMMENT 'Ingest FHIR JSON records as Full Text STRING'
TBLPROPERTIES (
  'delta.enableChangeDataFeed' = 'true',
  'delta.enableDeletionVectors' = 'true',
  'delta.enableRowTracking' = 'true',
  'quality' = 'bronze'
)
AS SELECT
  _metadata as file_metadata
  ,* 
FROM STREAM read_files(
  '/Volumes/fhir_workshop/matthew_giglia/landing/'
  ,format => 'text'
  ,wholeText => true
)

In [0]:
SELECT * FROM fhir_bronze LIMIT 1;

In [0]:
select count(*) as rcrd_cnt from fhir_bronze;

In [0]:
DROP TABLE IF EXISTS fhir_bronze_variant;

CREATE OR REFRESH STREAMING TABLE fhir_bronze_variant 
COMMENT 'Evaluate FHIR JSON records as VARIANT'
TBLPROPERTIES (
  'delta.enableChangeDataFeed' = 'true'
  ,'delta.enableDeletionVectors' = 'true'
  ,'delta.enableRowTracking' = 'true'
  ,'quality' = 'bronze'
  ,'pipelines.channel' = 'PREVIEW'
  ,'delta.feature.variantType-preview' = 'supported'
)
AS SELECT
  file_metadata
  ,try_parse_json(value) as fhir 
FROM STREAM fhir_bronze;

In [0]:
select * from fhir_bronze_variant limit 1;

In [0]:
SELECT
  file_metadata
  ,fhir
  ,entry.*
FROM 
  fhir_bronze_variant
  ,LATERAL variant_explode(fhir:entry) as entry
LIMIT 100

In [0]:
SELECT
  file_metadata
  ,CAST(entry.value:fullUrl AS STRING) as fullUrl
  ,CAST(entry.value:resource.resourceType AS STRING) as resourceType
  ,entry.value as entry
FROM 
  fhir_bronze_variant
  ,LATERAL variant_explode(fhir:entry) as entry

In [0]:
SELECT
  file_metadata
  ,CAST(entry.value:fullUrl AS STRING) as fullUrl
  ,CAST(entry.value:resource.resourceType AS STRING) as resourceType
  ,resource.*
FROM 
  fhir_bronze_variant
  ,LATERAL variant_explode(fhir:entry) as entry
  ,LATERAL variant_explode(entry.value:resource) as resource

In [0]:
DROP TABLE IF EXISTS fhir_resources;

CREATE OR REFRESH STREAMING TABLE fhir_resources 
COMMENT 'Exploded FHIR Resources'
TBLPROPERTIES (
  'delta.enableChangeDataFeed' = 'true'
  ,'delta.enableDeletionVectors' = 'true'
  ,'delta.enableRowTracking' = 'true'
  ,'quality' = 'bronze'
  ,'pipelines.channel' = 'PREVIEW'
  ,'delta.feature.variantType-preview' = 'supported'
)
AS SELECT
  file_metadata
  ,CAST(entry.value:fullUrl AS STRING) as fullUrl
  ,CAST(entry.value:resource.resourceType AS STRING) as resourceType
  ,resource.*
FROM 
  STREAM(fhir_bronze_variant)
  ,LATERAL variant_explode(fhir:entry) as entry
  ,LATERAL variant_explode(entry.value:resource) as resource

In [0]:
SELECT 
  resourceType
  ,count(distinct fullUrl) as cnt
FROM 
  mgiglia.synthea.fhir_resources
GROUP BY ALL
ORDER BY cnt DESC;

In [0]:
DECLARE OR REPLACE VARIABLE patient_keys ARRAY<STRING>;

SET VAR patient_keys = (
  SELECT 
    collect_list(DISTINCT key)
  FROM 
    fhir_resources 
  WHERE 
    resourceType = 'Patient'
);

select patient_keys;

In [0]:
SELECT
  *
FROM (
  SELECT
    file_metadata
    ,fullUrl
    ,key
    ,value
  FROM 
    fhir_resources
  WHERE 
    resourceType = 'Patient')
  PIVOT (
    first(value) FOR key IN ("multipleBirthInteger","name","birthDate","id","address","gender","telecom","resourceType","text","communication","maritalStatus","identifier","multipleBirthBoolean","deceasedDateTime","meta","extension")
  )

In [0]:
CREATE OR REPLACE TABLE patient 
AS SELECT
  *
FROM (
  SELECT
    file_metadata
    ,fullUrl
    ,key
    ,value
  FROM 
    fhir_resources
  WHERE 
    resourceType = 'Patient')
  PIVOT (
    first(value) FOR key IN ("multipleBirthInteger","name","birthDate","id","address","gender","telecom","resourceType","text","communication","maritalStatus","identifier","multipleBirthBoolean","deceasedDateTime","meta","extension")
  );

In [0]:
SHOW CREATE TABLE patient;

In [0]:
WITH patient_fixed as (
  SELECT
    CAST(gender as STRING) as gender
    ,CAST(id as STRING) as patient_id
    ,CAST(birthDate as DATE) as birthDate
  FROM main.hm_dday.patient
)
SELECT 
  gender
  ,count(distinct patient_id) as cnt
  ,AVG(DATEDIFF(current_date(), birthDate) / 365.25) as avg_age
FROM patient_fixed
GROUP BY gender

In [0]:
WITH patient_fixed as (
  SELECT
    CAST(id as STRING) as patient_id
    ,CAST(address:[0].city as STRING) as primary_city
  FROM main.hm_dday.patient
)
SELECT 
  primary_city
  ,count(distinct patient_id) as cnt
FROM patient_fixed
GROUP BY primary_city
ORDER BY cnt DESC

In [0]:
WITH address_fixed as (
  SELECT
    CAST(id as STRING) as patient_id
    ,CAST(coordinates.value:url as STRING) as coordinate_type
    ,CAST(coordinates.value:valueDecimal as FLOAT) as coordinates
  FROM patient
  ,LATERAL variant_explode(address) as address
  ,LATERAL variant_explode(address.value:extension) as extension
  ,LATERAL variant_explode(extension.value:extension) as coordinates
)
SELECT
  *
FROM
  address_fixed
  PIVOT (
    first(coordinates) FOR coordinate_type IN ("latitude","longitude")
  )
;

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.