In [0]:
%sql
CREATE CATALOG IF NOT EXISTS yogurt;
CREATE SCHEMA IF NOT EXISTS yogurt.bronze;
USE CATALOG yogurt;
USE SCHEMA bronze;


In [0]:
%sql

CREATE OR REPLACE TABLE customer  USING DELTA AS SELECT * FROM samples.tpch.customer;
CREATE OR REPLACE TABLE orders    USING DELTA AS SELECT * FROM samples.tpch.orders;
CREATE OR REPLACE TABLE lineitem  USING DELTA AS SELECT * FROM samples.tpch.lineitem;
CREATE OR REPLACE TABLE part      USING DELTA AS SELECT * FROM samples.tpch.part;
CREATE OR REPLACE TABLE supplier  USING DELTA AS SELECT * FROM samples.tpch.supplier;
CREATE OR REPLACE TABLE partsupp  USING DELTA AS SELECT * FROM samples.tpch.partsupp;
CREATE OR REPLACE TABLE nation    USING DELTA AS SELECT * FROM samples.tpch.nation;
CREATE OR REPLACE TABLE region    USING DELTA AS SELECT * FROM samples.tpch.region;


In [0]:
%sql
SHOW TABLES;

In [0]:
%sql
WITH src AS (
  SELECT 'customer'  AS t, COUNT(*) c, COUNT(DISTINCT c_custkey) d FROM samples.tpch.customer
  UNION ALL SELECT 'orders',    COUNT(*), COUNT(DISTINCT o_orderkey) FROM samples.tpch.orders
  UNION ALL SELECT 'lineitem',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(l_orderkey AS STRING), CAST(l_linenumber AS STRING))) FROM samples.tpch.lineitem
  UNION ALL SELECT 'part',      COUNT(*), COUNT(DISTINCT p_partkey) FROM samples.tpch.part
  UNION ALL SELECT 'supplier',  COUNT(*), COUNT(DISTINCT s_suppkey) FROM samples.tpch.supplier
  UNION ALL SELECT 'partsupp',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(ps_partkey AS STRING), CAST(ps_suppkey AS STRING))) FROM samples.tpch.partsupp
  UNION ALL SELECT 'nation',    COUNT(*), COUNT(DISTINCT n_nationkey) FROM samples.tpch.nation
  UNION ALL SELECT 'region',    COUNT(*), COUNT(DISTINCT r_regionkey) FROM samples.tpch.region
),
brz AS (
  SELECT 'customer'  AS t, COUNT(*) c, COUNT(DISTINCT c_custkey) d FROM customer
  UNION ALL SELECT 'orders',    COUNT(*), COUNT(DISTINCT o_orderkey) FROM orders
  UNION ALL SELECT 'lineitem',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(l_orderkey AS STRING), CAST(l_linenumber AS STRING))) FROM lineitem
  UNION ALL SELECT 'part',      COUNT(*), COUNT(DISTINCT p_partkey) FROM part
  UNION ALL SELECT 'supplier',  COUNT(*), COUNT(DISTINCT s_suppkey) FROM supplier
  UNION ALL SELECT 'partsupp',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(ps_partkey AS STRING), CAST(ps_suppkey AS STRING))) FROM partsupp
  UNION ALL SELECT 'nation',    COUNT(*), COUNT(DISTINCT n_nationkey) FROM nation
  UNION ALL SELECT 'region',    COUNT(*), COUNT(DISTINCT r_regionkey) FROM region
)
SELECT s.t AS table_name,
       s.c AS src_count, b.c AS bronze_count,
       s.d AS src_pk_distinct, b.d AS bronze_pk_distinct,
       CASE WHEN s.c=b.c AND s.d=b.d THEN 'OK' ELSE 'MISMATCH' END AS status
FROM src s JOIN brz b ON s.t=b.t
ORDER BY 1;


In [0]:
%sql
CREATE TABLE IF NOT EXISTS _validation_runs (
  ts TIMESTAMP,
  table_name STRING,
  src_count BIGINT,
  bronze_count BIGINT,
  src_pk_distinct BIGINT,
  bronze_pk_distinct BIGINT,
  status STRING
) USING DELTA;

WITH src AS (
  SELECT 'customer'  AS t, COUNT(*) c, COUNT(DISTINCT c_custkey) d FROM samples.tpch.customer
  UNION ALL SELECT 'orders',    COUNT(*), COUNT(DISTINCT o_orderkey) FROM samples.tpch.orders
  UNION ALL SELECT 'lineitem',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(l_orderkey AS STRING), CAST(l_linenumber AS STRING))) FROM samples.tpch.lineitem
  UNION ALL SELECT 'part',      COUNT(*), COUNT(DISTINCT p_partkey) FROM samples.tpch.part
  UNION ALL SELECT 'supplier',  COUNT(*), COUNT(DISTINCT s_suppkey) FROM samples.tpch.supplier
  UNION ALL SELECT 'partsupp',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(ps_partkey AS STRING), CAST(ps_suppkey AS STRING))) FROM samples.tpch.partsupp
  UNION ALL SELECT 'nation',    COUNT(*), COUNT(DISTINCT n_nationkey) FROM samples.tpch.nation
  UNION ALL SELECT 'region',    COUNT(*), COUNT(DISTINCT r_regionkey) FROM samples.tpch.region
),
brz AS (
  SELECT 'customer'  AS t, COUNT(*) c, COUNT(DISTINCT c_custkey) d FROM customer
  UNION ALL SELECT 'orders',    COUNT(*), COUNT(DISTINCT o_orderkey) FROM orders
  UNION ALL SELECT 'lineitem',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(l_orderkey AS STRING), CAST(l_linenumber AS STRING))) FROM lineitem
  UNION ALL SELECT 'part',      COUNT(*), COUNT(DISTINCT p_partkey) FROM part
  UNION ALL SELECT 'supplier',  COUNT(*), COUNT(DISTINCT s_suppkey) FROM supplier
  UNION ALL SELECT 'partsupp',  COUNT(*), COUNT(DISTINCT concat_ws('#', CAST(ps_partkey AS STRING), CAST(ps_suppkey AS STRING))) FROM partsupp
  UNION ALL SELECT 'nation',    COUNT(*), COUNT(DISTINCT n_nationkey) FROM nation
  UNION ALL SELECT 'region',    COUNT(*), COUNT(DISTINCT r_regionkey) FROM region
)
INSERT INTO _validation_runs
SELECT current_timestamp(), s.t, s.c, b.c, s.d, b.d,
       CASE WHEN s.c=b.c AND s.d=b.d THEN 'OK' ELSE 'MISMATCH' END
FROM src s JOIN brz b ON s.t=b.t;


In [0]:
%sql
SELECT * FROM _validation_runs ORDER BY ts DESC, table_name;


In [0]:
%sql
SELECT 'orders src'   AS scope, SUM(crc32(to_json(struct(*)))) AS checksum FROM samples.tpch.orders
UNION ALL
SELECT 'orders bronze',         SUM(crc32(to_json(struct(*)))) FROM orders;

SELECT 'lineitem src' AS scope, SUM(crc32(to_json(struct(*)))) FROM samples.tpch.lineitem
UNION ALL
SELECT 'lineitem bronze',       SUM(crc32(to_json(struct(*)))) FROM lineitem;
