Silver Table Generation

In [0]:
products = [("USA", "ProductA", 1000, 200), ("India", "ProductB", 500, 50), ("UK", "ProductC", 700, 100), ("Canada", "ProductD", 200, 20)]

columns = ["Country", "Product", "Stock", "Sales"]

spark.createDataFrame(data = products, schema = columns).write.format("delta").mode("overwrite").saveAsTable("silver_Table")

In [0]:
%sql
select * from silver_Table

Country,Product,Stock,Sales
Canada,ProductD,200,20
India,ProductB,500,50
USA,ProductA,1000,200
UK,ProductC,700,100


Gold Table Generation

In [0]:
import pyspark.sql.functions as F
(spark.read.format("delta").table("silver_Table")
			.withColumn("SaleRate", F.col("Sales") / F.col("Stock"))
			.drop("Stock").drop("Sales")
			.write.format("delta").mode("overwrite").saveAsTable("gold_Table"))

In [0]:
%sql
select * from gold_Table

Country,Product,SaleRate
Canada,ProductD,0.1
India,ProductB,0.1
USA,ProductA,0.2
UK,ProductC,0.1428571428571428


In [0]:
%sql
alter table silver_Table set tblproperties (delta.enableChangeDataFeed = true)

Insert new records

In [0]:
new_products = [("Australia", "ProductE", 300, 30)]
spark.createDataFrame(data = new_products, schema = columns).write.format("delta").mode("append").saveAsTable("silver_Table")

Update a record in silver_Table

In [0]:
%sql
update silver_Table set Stock = '1200' where Country = 'USA' and Product = 'ProductA'

num_affected_rows
1


delete a record from Silver_Table

In [0]:
%sql
delete from silver_Table where Country = 'UK' and Product = 'ProductC'

num_affected_rows
1


In [0]:
%sql
select * from silver_Table

Country,Product,Stock,Sales
USA,ProductA,1200,200
Australia,ProductE,300,30
Canada,ProductD,200,20
India,ProductB,500,50


In [0]:
%sql
describe history silver_table

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
4,2024-09-26T17:05:16Z,5199045096932667,kammaramakrishna12@gmail.com,DELETE,"Map(predicate -> [""((Country#3577 = UK) AND (Product#3578 = ProductC))""])",,List(2851002469361937),0926-163150-vm1ikvdx,3.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1360, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 1, executionTimeMs -> 3218, numDeletedRows -> 1, scanTimeMs -> 1850, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 1368)",,Databricks-Runtime/13.3.x-scala2.12
3,2024-09-26T17:03:35Z,5199045096932667,kammaramakrishna12@gmail.com,UPDATE,"Map(predicate -> [""((Country#2894 = USA) AND (Product#2895 = ProductA))""])",,List(2851002469361937),0926-163150-vm1ikvdx,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1367, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 1, executionTimeMs -> 4579, scanTimeMs -> 2563, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1584, rewriteTimeMs -> 1980)",,Databricks-Runtime/13.3.x-scala2.12
2,2024-09-26T17:02:37Z,5199045096932667,kammaramakrishna12@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(2851002469361937),0926-163150-vm1ikvdx,1.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1408)",,Databricks-Runtime/13.3.x-scala2.12
1,2024-09-26T17:02:16Z,5199045096932667,kammaramakrishna12@gmail.com,SET TBLPROPERTIES,"Map(properties -> {""delta.enableChangeDataFeed"":""true""})",,List(2851002469361937),0926-163150-vm1ikvdx,0.0,WriteSerializable,True,Map(),,Databricks-Runtime/13.3.x-scala2.12
0,2024-09-26T16:50:39Z,5199045096932667,kammaramakrishna12@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], description -> null, isManaged -> true, properties -> {}, statsOnLoad -> false)",,List(2851002469361937),0926-163150-vm1ikvdx,,WriteSerializable,False,"Map(numFiles -> 4, numOutputRows -> 4, numOutputBytes -> 5496)",,Databricks-Runtime/13.3.x-scala2.12


In [0]:
%sql
select *,_commit_timestamp from table_changes('silver_Table', 1) order by _commit_timestamp

Country,Product,Stock,Sales,_change_type,_commit_version,_commit_timestamp
Australia,ProductE,300,30,insert,2,2024-09-26T17:02:37Z
USA,ProductA,1000,200,update_preimage,3,2024-09-26T17:03:35Z
USA,ProductA,1200,200,update_postimage,3,2024-09-26T17:03:35Z
UK,ProductC,700,100,delete,4,2024-09-26T17:05:16Z


In [0]:
changes_df = spark.read.format("delta").option("readChangeData", True).option("startingVersion", 2).table('silver_Table')
display(changes_df)

Country,Product,Stock,Sales,_change_type,_commit_version,_commit_timestamp
USA,ProductA,1000,200,update_preimage,3,2024-09-26T17:03:35Z
USA,ProductA,1200,200,update_postimage,3,2024-09-26T17:03:35Z
UK,ProductC,700,100,delete,4,2024-09-26T17:05:16Z
Australia,ProductE,300,30,insert,2,2024-09-26T17:02:37Z


Propagate changes from silver to gold table

In [0]:
%sql
create or replace temporary view silver_Table_latest_version as
select *
	from
	      (select *, rank() over(partition by Country, Product order by _commit_version desc)as rnk from table_changes('silver_Table', 2,5) where _change_type != 'update_preimage')
where rnk = 1

In [0]:
%sql
select * from silver_Table_latest_version

Country,Product,Stock,Sales,_change_type,_commit_version,_commit_timestamp,rnk
Australia,ProductE,300,30,insert,2,2024-09-26T17:02:37Z,1
UK,ProductC,700,100,delete,4,2024-09-26T17:05:16Z,1
USA,ProductA,1200,200,update_postimage,3,2024-09-26T17:03:35Z,1


Merge Changes into Gold Table

In [0]:
%sql
MERGE INTO gold_Table AS t USING silver_Table_latest_version AS s
ON s.Country = t.Country AND s.Product = t.Product 
WHEN MATCHED AND s._change_type = 'update_postimage' THEN
    UPDATE SET t.SaleRate = s.Sales/s.Stock
WHEN MATCHED AND s._change_type = 'delete' THEN
    DELETE
WHEN NOT MATCHED THEN
    INSERT (Country, Product, SaleRate) VALUES (s.Country, s.Product, s.Sales / s.Stock);

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,1,1,1


In [0]:
%sql
select * from gold_Table

Country,Product,SaleRate
Canada,ProductD,0.1
Australia,ProductE,0.1
USA,ProductA,0.1666666666666666
India,ProductB,0.1


Secret Scope

In [0]:
secret_name = dbutils.secrets.get(scope = "ramakrishna", key = "test")

In [0]:
print(secret_name)

In [0]:
if secret_name == "ramakrishna1":
    print("Success")
else:
    print("Failure")