In [None]:
import pyspark.sql.functions as F

In [None]:
workspace_id = ""
lakehouse_id = ""
path = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables"

## Load Example

In [None]:
df = spark.read.format("delta").load(f'{path}/Customer')
display(df)

## Selective load

In [None]:
customers = spark.read.format("delta").load(f'{path}/Customer').select(
    F.col('cr8dc_customeridentifier').alias('Id')
    ,F.col('cr8dc_fullname').alias('FullName')
)
display(customers)

In [None]:
products = spark.read.format("delta").load(f'{path}/Product').select(
    F.col('cr8dc_productname').alias('ProductName')
    ,F.col('cr8dc_productidentifier').alias('Id')
    ,F.col('cr8dc_unitprice').alias('UnitPrice')
    ,F.col('cr8dc_unitprice_base').alias('UnitPriceBase')
    ,F.col('cr8dc_ProductCategory').alias('ProductCategory')
)
display(products)

In [None]:
sales = spark.read.format("delta").load(f'{path}/Sales').select(
    F.col('cr8dc_saleidentifier').alias('Id')
    ,F.col('cr8dc_saledate').cast("Timestamp").alias('SaleDate')
    ,F.col('cr8dc_quantitysold').alias('SoldUnits')
    ,F.col('cr8dc_productidentifier').alias('ProductID')
    ,F.col('cr8dc_customeridentifier').alias('CustomerID')
)
display(sales)

## Combination

In [None]:
grouped_orders_by_customer = customers.join(sales,[customers.Id == sales.CustomerID],'leftouter')
grouped_orders_by_customer = grouped_orders_by_customer.groupBy('FullName').agg(F.sum("SoldUnits").alias("Sold_Units_Aggregated"))
display(grouped_orders_by_customer)

In [None]:
grouped_orders_by_customer_and_product = sales\
    .join(products,[sales.ProductID == products.Id],'leftouter')\
    .join(customers,[sales.CustomerID == customers.Id], 'leftouter')\
    .withColumn('Revenue', F.lit(F.col('SoldUnits')*F.col('UnitPrice')))\
    .groupBy('ProductName','FullName').agg(F.sum('Revenue').alias('Revenue'))
display(grouped_orders_by_customer_and_product)

In [None]:
grouped_orders_by_customer_and_product.write.mode("overwrite").format("delta").save(f"{path}/RevenueByProductsAndCustomers")

In [None]:
preview_created_data = spark.read.format("delta").load(f"{path}/RevenueByProductsAndCustomers")
display(preview_created_data)