In [0]:
from pyspark import pipelines as dp
from pyspark.sql.functions import col,current_timestamp

In [0]:
@dlt.table(
    name = "bronze_customers_py",
    table_properties = {"quality":"bronze"},
    comment = "this is bronze table"
)
def create_bronze_customers():
    return(
        spark.readStream.format("cloudFiles")
             .option("cloudFiles.format","json")
             .load("/Volumes/circuitbox/landing/operationaldata/customers/")
             .withColumn("input_file_name",col("_metadata.file_path"))
             .withColumn("ingestion_timestamp",current_timestamp())
    )

In [0]:
@dp.table(
    name = "silver_customers_clean_py",
    table_properties = {"quality" : "silver"},
    comment = "this is silver table with cleaned data"
)
@dp.expect_or_fail("validate_customer_id","customer_id is not null")
@dp.expect_or_drop("validate_customer_name","customer_name is not null")
@dp.expect_all({"validate_telephone": "length(telephone) >= 10","validate_email":"email is not null","validate_date_of_bith":"date_of_birth >= '1920-01-01'"})
def create_silver_customers_clean():
    return(
        spark.readStream.table("live.bronze_customers_py")
             .select("customer_id","customer_name","email","telephone",col("date_of_birth").cast("date"),col("created_date").cast("date"))

    )

In [0]:
dp.create_streaming_table(
    name = "silver_customers_py",
    comment = "this is final silver table",
    table_properties = {"quality" : "silver"}
)

In [0]:
dp.create_auto_cdc_flow(
    target = "silver_customers_py",
    source = "silver_customers_clean_py",
    keys = ["customer_id"],
    sequence_by = "created_date",
    stored_as_scd_type = 1
)