
# Silver To Gold: Building BI Ready Tables

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType,IntegerType, DateType, TimestampType

catalog_name = "ecommerce"

In [0]:
df = spark.table(f"{catalog_name}.silver.slv_order_items")

display(df.limit(5))

In [0]:
# Add Gross Amount
df_gold = df.withColumn("gross_amount", F.col("quantity") * F.col("unit_price"))

# Add discoutn amount
df_gold = df_gold.withColumn(
    "discount_amount",
    F.ceil(F.col("gross_amount") * (F.col("discount_pct")/ 100.0))
)

# Add sale amount -> gross - discount + tax
df_gold = df_gold.withColumn(
    "sale_amount",
    F.col("gross_amount") - F.col("discount_amount") + F.col("tax_amount")
)

# Add dateid
df_gold = df_gold.withColumn(
    "date_id",
    F.date_format(F.col("dt"), "yyyyMMdd").cast(IntegerType())   # create date_key
) 

# coupon flag
df_gold = df_gold.withColumn(
    "coupon_flag",
    F.when(F.col("coupon_code").isNotNull(),F.lit(1))
     .otherwise(F.lit(0))
)

df_gold.limit(8).display()

In [0]:
# Currency Conversion
# Define your fixed FX rates (as of 2025-10-15, like your PBI note) 
fx_rates = {
    "INR": 1.00,
    "AED": 24.18,
    "AUD": 57.55,
    "CAD": 62.93,
    "GBP": 117.98,
    "SGD": 68.18,
    "USD": 88.29,
}

rates = [(k, float(v)) for k, v in fx_rates.items()]
rates_df = spark.createDataFrame(rates, ["currency", "inr_rate"])
rates_df.show()

In [0]:
df_gold = (
    df_gold.join(
    rates_df,
    rates_df.currency == F.upper(F.trim(F.col("unit_price_currency"))),
    "left"
    )
    .withColumn("sale_amount_inr",F.col("sale_amount") * F.col("inr_rate"))
    .withColumn("sale_amount_inr", F.ceil(F.col("sale_amount_inr")))
)
df_gold.limit(5).display()

In [0]:
orders_gold_df = df_gold.select(
    F.col("date_id"),
    F.col("dt").alias("transaction_date"),
    F.col("order_ts").alias("transaction_ts"),
    F.col("order_id").alias("transaction_id"),
    F.col("customer_id"),
    F.col("item_seq").alias("seq_no"),
    F.col("product_id"),
    F.col("channel"),
    F.col("coupon_code"),
    F.col("coupon_flag"),
    F.col("unit_price_currency"),
    F.col("quantity"),
    F.col("unit_price"),
    F.col("gross_amount"),
    F.col("discount_pct").alias("discount_percent"),
    F.col("discount_amount"),
    F.col("tax_amount"),
    F.col("sale_amount").alias("net_amount"),
    F.col("sale_amount_inr").alias("net_amount_inr")
)

In [0]:
orders_gold_df.limit(5).display()

In [0]:
# write raw data to gold layer ->  ecommerce, gold, gld_fact_order_items
orders_gold_df.write.format("delta")\
    .mode("overwrite")\
    .option("mergeSchema","true")\
    .saveAsTable(f"{catalog_name}.gold.gld_fact_order_items")

In [0]:
# sanity check
orders_gold_df.count()