In [0]:
import dlt
from pyspark.sql.functions import col,current_timestamp,explode

In [0]:
@dlt.table(
    name = "bronze_orders",
    table_properties ={"quality" : "bronze"},
    comment = "This is the bronze table"
)
def create_bronze_orders():
    return(
        spark.readStream
             .format("cloudFiles")
             .option("cloudFiles.format", "json")
             .option("cloudFiles.inferColumnTypes", "true")
             .load("/Volumes/circuitbox/landing/operationaldata/orders/")
             .withColumn("input_file_name",col("_metadata.file_path"))
             .withColumn("ingest_time",current_timestamp())
    )

In [0]:
@dlt.table(
    name = "sliver_orders_clean",
    table_properties ={"quality" : "silver"},
    comment = "This is the silver table"
)
@dlt.expect_or_fail("valid_customer_id","customer_id is not null")
@dlt.expect_or_fail("valid_order_id","order_id is not null")
@dlt.expect("valid_order_status","order_status in ('Pending','Completed','Shipped','Cancelled')")
@dlt.expect("valid_payment","payment_method in ('Credit Card','PayPal','Bank Transfer')")
def Create_sliver_orders_clean():
    return(
        spark.readStream.table("LIVE.bronze_orders")
             .withColumn("order_timestamp",col("order_timestamp").cast("timestamp"))
    )

In [0]:
@dlt.table(
    name = "silver_orders",
    table_properties ={"quality" : "silver"},
    comment = "This is the silver table"
)
def create_silver_orders():
    return(
        spark.readStream.table("sliver_orders_clean")
             .withColumn("new_items",explode(col("items")))
             .select("*",col("new_items.category"),col("new_items.item_id"),col("new_items.name"),col("new_items.price"),col("new_items.quantity"))
            .drop(col("items"),col("new_items"))
        )