- **Streaming table**	- Each record is processed exactly once. This assumes an append-only source.
- **Materialized view** - Records are processed as required to return accurate results for the current data state. Materialized views should be used for data processing tasks such as transformations, aggregations, or pre-computing slow queries and frequently used computations.
- **View** - Records are processed each time the view is queried. Use views for intermediate transformations and data quality checks that should not be published to public datasets.

In [0]:
import dlt


In [0]:
# Create a streaming table for Orders
@dlt.table(
  table_properties = { "quality": "bronze" },
  comment = "Order bronze table"
)

def orders_bronze():
  df = spark.readStream.table("dev.bronze.orders_raw")
  return df

In [0]:
# Create a materialized view for customers
@dlt.table(
  table_properties = {"quality": "bronze"},
  comment = "Customer bronze table",
  name = "customer_bronze"
)

def cust_bronze():
  df = spark.read.table("dev.bronze.customer_raw")
  return df

In [0]:
# Create a view to join orders with customers
@dlt.table(
  comment = "Joined View"
)

def joined_vw():
  df_c = spark.read.table("LIVE.customer_bronze")
  df_o = spark.read.table("LIVE.orders_bronze")
  df_join = df_o.join(df_c, how = "left_outer", on=df_c.c_custkey==df_o.o_custkey)
  
  return df_join

In [0]:
# Create MV to add new column
from pyspark.sql.functions import current_timestamp, count, sum

@dlt.table(
  table_properties = {"quality": "silver"},
  comment = "Joined Table",
  name = "joined_silver"
)

def joined_silver():
  df = spark.read.table("LIVE.joined_vw").withColumn("__insert_date", current_timestamp())
  return df

In [0]:
# Aggregate based on c_mktsegment and find the count of oder (c_orderkey)
@dlt.table(
  table_properties = { "quality": "gold" },
  comment = "Orders aggregated table"
)

def orders_agg_gold():
  df = spark.read.table("LIVE.joined_silver")

  df_final = df.groupBy("c_mktsegment").agg(count("o_orderkey").alias("count_orders"), sum("o_totalprice").alias("sum_totalprice")).withColumn("__insert_date", current_timestamp())

  return df_final