In [0]:
import dlt
from pyspark.sql.functions import col

@dlt.table(
  name="sales_city",
  comment="sales data by city",
  table_properties={
    "quality": "bronze"
  }
)
def sales_city():
    df = spark.read.format("csv").option("header", True).load("/mnt/input/sales_data_sample.csv")
    return df.select(
        col('ordernumber').alias('ord_no'),
        col('quantityordered').alias('ord_qnty'),
        col('sales'),
        col('city')
    )


In [0]:
import dlt
from pyspark.sql.functions import col

@dlt.table(
  name="sales_country",
  comment="sales data by country",
  table_properties={
    "quality": "bronze"
  }
)
def sales_country():
    df = spark.read.format("csv").option("header", True).load("/mnt/input/sales_data_sample.csv")
    return df.select(
        col('ordernumber').alias('ord_no'),
        col('quantityordered').alias('ord_qnty'),
        col('sales'),
        col('country')
    )

In [0]:
import dlt
from pyspark.sql.functions import col

@dlt.table
def joined_sales():
    # Read external tables directly
    city_df = spark.read.table("sam_data.sales_city")
    country_df = spark.read.table("sam_data.sales_country")

    # Rename duplicate columns to avoid conflict
    city_df = city_df.selectExpr("ord_no", "ord_qnty as city_ord_qnty", "sales as city_sales")
    country_df = country_df.selectExpr("ord_no", "ord_qnty as country_ord_qnty", "sales as country_sales")

    # Perform the join
    joined_df = city_df.join(country_df, on="ord_no", how="inner")

    return joined_df