In [0]:
use mgiglia.dev_matthew_giglia_price_transparency;

ANALYZE TABLE index_json_bronze COMPUTE STATISTICS FOR ALL COLUMNS;

WITH index_variant_bronze as (
  FROM index_json_bronze |> 
  SELECT index_file_source_id, file_metadata, ingest_time, try_parse_json(value) as variant_col
)
,first_explode as (
  FROM index_variant_bronze
  ,LATERAL variant_explode(variant_col) |>
  SELECT index_file_source_id, file_metadata, ingest_time, key, value |>
  PIVOT (first(value) FOR key IN ('reporting_entity_name', 'reporting_entity_type', 'reporting_structure')) |>
  SELECT index_file_source_id, file_metadata, ingest_time, reporting_entity_name::string, reporting_entity_type::string, reporting_structure
)
,second_explode as (FROM first_explode
,LATERAL variant_explode(reporting_structure) as reporting_structure
,LATERAL variant_explode(reporting_structure.value) as reporting_structure_value |>
SELECT index_file_source_id, file_metadata, ingest_time, reporting_entity_name, reporting_entity_type, reporting_structure_value.key as file_type, reporting_structure_value.value)
FROM second_explode
,LATERAL variant_explode(value) as file_desc
,LATERAL variant_explode(file_desc.value) as file_desc_value |>
SELECT index_file_source_id, file_metadata, ingest_time, reporting_entity_name, reporting_entity_type, file_type, file_desc.key, file_desc.pos, file_desc_value.key as file_desc_value_key, file_desc_value.pos as file_desc_value_pos, file_desc_value.value as file_desc_value_value|>
SELECT * EXCEPT (key, file_desc_value_pos) |>
PIVOT (first(file_desc_value_value) FOR file_desc_value_key IN ('description','plan_id_type','plan_id','plan_market_type','plan_name','location')) |> 
SELECT index_file_source_id, file_metadata, ingest_time, reporting_entity_name, reporting_entity_type, file_type, description::string as file_description, plan_id_type::string as reporting_plan_id_type, plan_id::string as reporting_plan_id, plan_market_type::string as reporting_plan_market_type, plan_name::string as reporting_plan_name, location::string as file_location |>
SELECT *, sha2(concat(file_metadata.file_path, ingest_time, reporting_entity_name, reporting_entity_type, file_type, file_description, file_location), 256) as in_network_file_index_id;

-- EXPLAIN
FROM (FROM (FROM (FROM index_json_bronze WATERMARK rcrd_timestamp DELAY OF INTERVAL 2 SECONDS |> 
SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, window(rcrd_timestamp, '2 SECONDS') as rcrd_timestamp_window, try_parse_json(value) as variant_col) 
,LATERAL variant_explode(variant_col) |>
SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, rcrd_timestamp_window, key, value |>
PIVOT (first(value) FOR key IN ('reporting_entity_name', 'reporting_entity_type', 'reporting_structure')) |>
SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name::string, reporting_entity_type::string, reporting_structure)
,LATERAL variant_explode(reporting_structure) as reporting_structure
,LATERAL variant_explode(reporting_structure.value) as reporting_structure_value |>
SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name, reporting_entity_type, reporting_structure_value.key as file_type, reporting_structure_value.value)
,LATERAL variant_explode(value) as file_desc
,LATERAL variant_explode(file_desc.value) as file_desc_value |>
SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name, reporting_entity_type, file_type, file_desc.key, file_desc.pos, file_desc_value.key as file_desc_value_key, file_desc_value.pos as file_desc_value_pos, file_desc_value.value as file_desc_value_value|>
SELECT * EXCEPT (key, file_desc_value_pos) |>
PIVOT (first(file_desc_value_value) FOR file_desc_value_key IN ('description','plan_id_type','plan_id','plan_market_type','plan_name','location')) |> 
SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name, reporting_entity_type, file_type, description::string as file_description, plan_id_type::string as reporting_plan_id_type, plan_id::string as reporting_plan_id, plan_market_type::string as reporting_plan_market_type, plan_name::string as reporting_plan_name, location::string as file_location |>
SELECT *, sha2(concat(file_metadata.file_path, ingest_time, reporting_entity_name, reporting_entity_type, file_type, file_description, file_location), 256) as in_network_file_index_id;



-----------------------------------------------
CREATE TEMPORARY VIEW index_json_bronze_cdf AS 
SELECT * FROM TABLE_CHANGES("mgiglia.dev_matthew_giglia_price_transparency.index_json_bronze", 1);

SELECT * FROM index_json_bronze_cdf;

FROM (FROM (FROM (FROM index_json_bronze_cdf WATERMARK _commit_timestamp DELAY OF INTERVAL 1 MILLISECOND |> 
  WHERE rcrd_timestamp IS NOT NULL |>
  SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, try_parse_json(value) as variant_col, window(_commit_timestamp, '1 MILLISECOND') as _commit_window, _change_type, _commit_version) 
  ,LATERAL variant_explode(variant_col) |>
  SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, key, value, _commit_window, _change_type, _commit_version |>
  PIVOT (first(value) FOR key IN ('reporting_entity_name', 'reporting_entity_type', 'reporting_structure')) |>
  SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name::string, reporting_entity_type::string, reporting_structure, _commit_window, _change_type, _commit_version)
  ,LATERAL variant_explode(reporting_structure) as reporting_structure
  ,LATERAL variant_explode(reporting_structure.value) as reporting_structure_value |>
  SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name, reporting_entity_type, reporting_structure_value.key as file_type, reporting_structure_value.value, _commit_window, _change_type, _commit_version)
  ,LATERAL variant_explode(value) as file_desc
  ,LATERAL variant_explode(file_desc.value) as file_desc_value |>
  WHERE file_type LIKE 'allowed%' |>
  SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name, reporting_entity_type, file_type, file_desc.key, file_desc.pos, file_desc_value.key as file_desc_value_key, file_desc_value.pos as file_desc_value_pos, file_desc_value.value as file_desc_value_value, _commit_window, _change_type, _commit_version |>
  SELECT * EXCEPT (key, file_desc_value_pos) |>
  PIVOT (first(file_desc_value_value) FOR file_desc_value_key IN ('description','plan_id_type','plan_id','plan_market_type','plan_name','location')) |> 
  SELECT index_file_source_id, file_metadata, ingest_time, rcrd_timestamp, reporting_entity_name, reporting_entity_type, file_type, description::string as file_description, plan_id_type::string as reporting_plan_id_type, plan_id::string as reporting_plan_id, plan_market_type::string as reporting_plan_market_type, plan_name::string as reporting_plan_name, location::string as file_location, _commit_window, _change_type, _commit_version |>
  SELECT *, sha2(concat(file_metadata.file_path, ingest_time, reporting_entity_name, reporting_entity_type, file_type, file_description, file_location), 256) as allowed_amount_file_index_id;