### Demo of Delta Lake change data feed

#### Create a silver table that tracks absolute number vaccinations and available doses by country

In [0]:
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")

In [0]:
spark.catalog.setCurrentCatalog(catalog)
spark.catalog.setCurrentDatabase(schema)

In [0]:
countries = [("USA", 10000, 20000), ("India", 1000, 1500), ("UK", 7000, 10000), ("Canada", 500, 700) ]
columns = ["Country","NumVaccinated","AvailableDoses"]
spark.createDataFrame(data=countries, schema = columns).write.format("delta").mode("overwrite").saveAsTable("silverTable")

In [0]:
%sql
SELECT * FROM silverTable

In [0]:
import pyspark.sql.functions as F
spark.read.format("delta").table("silverTable").withColumn("VaccinationRate", F.col("NumVaccinated") / F.col("AvailableDoses")) \
  .drop("NumVaccinated").drop("AvailableDoses") \
  .write.format("delta").mode("overwrite").saveAsTable("goldTable")

#### Generate gold table showing vaccination rate by country

In [0]:
%sql
SELECT * FROM goldTable

### Enable change data feed on silver table

In [0]:
%sql
ALTER TABLE silverTable SET TBLPROPERTIES (delta.enableChangeDataFeed = true)

### Update silver table daily

In [0]:
# Insert new records
new_countries = [("Australia", 100, 3000)]
spark.createDataFrame(data=new_countries, schema = columns).write.format("delta").mode("append").saveAsTable("silverTable")

In [0]:
%sql
-- update a record
UPDATE silverTable SET NumVaccinated = '11000' WHERE Country = 'USA'

In [0]:
%sql
-- delete a record
DELETE from silverTable WHERE Country = 'UK'

In [0]:
%sql
SELECT * FROM silverTable

### Explore the change data in SQL and PySpark 

In [0]:
%sql
DESCRIBE HISTORY silverTable

In [0]:
%sql
DESCRIBE DETAIL silverTable

In [0]:
%sql
-- view the changes
SELECT * FROM table_changes('silverTable', 2) order by _commit_timestamp

In [0]:
changes_df = spark.read.format("delta").option("readChangeData", True).option("startingVersion", 2).table('silverTable')
display(changes_df)

### Propagate changes from silver to gold table

In [0]:
%sql
-- Collect only the latest version for each country
CREATE OR REPLACE TEMPORARY VIEW silverTable_latest_version as
SELECT * 
    FROM 
         (SELECT *, rank() over (partition by Country order by _commit_version desc) as rank
          FROM table_changes('silverTable', 2, 5)
          WHERE _change_type !='update_preimage')
    WHERE rank=1

In [0]:
%sql
SELECT * FROM silverTable_latest_version

In [0]:
%sql
-- Merge the changes to gold
MERGE INTO goldTable t USING silverTable_latest_version s ON s.Country = t.Country
        WHEN MATCHED AND s._change_type='update_postimage' THEN UPDATE SET VaccinationRate = s.NumVaccinated/s.AvailableDoses
        WHEN NOT MATCHED THEN INSERT (Country, VaccinationRate) VALUES (s.Country, s.NumVaccinated/s.AvailableDoses)

In [0]:
%sql
SELECT * FROM goldTable

### Clean up tables

In [0]:
# %sql
# DROP TABLE silverTable;
# DROP TABLE goldTable;