In [0]:
%fs ls /mnt/silver

path,name,size,modificationTime
dbfs:/mnt/silver/Product/,Product/,0,1747477754000
dbfs:/mnt/silver/Region/,Region/,0,1747477819000
dbfs:/mnt/silver/Reseller/,Reseller/,0,1747478163000
dbfs:/mnt/silver/Sales/,Sales/,0,1747478977000
dbfs:/mnt/silver/Salesperson/,Salesperson/,0,1747479611000
dbfs:/mnt/silver/SalespersonRegion/,SalespersonRegion/,0,1747479661000
dbfs:/mnt/silver/Targets/,Targets/,0,1747479835000


# Sales Revenue by Region and Month

Tables: Sales & Region

In [0]:
df_sales = spark.read.format("delta").load("/mnt/silver/Sales")

df_region = spark.read.format("delta").load("/mnt/silver/Region")

df = df_sales.join(df_region, "salesterritorykey", "left")

from pyspark.sql.functions import date_format, sum

df = df.withColumn("month_year", date_format("orderdate", "yyyy-MM"))

df = df.groupBy("region", "month_year") \
  .agg(sum("sales").alias("total_sales")) \
  .orderBy("month_year") 



In [0]:
df.write.mode("overwrite").saveAsTable("gold_sales_by_region_month")


#SalesPerson Target Achievement

tables - sales, targets, salesperson

In [0]:
from pyspark.sql.functions import col, sum, round, month, year

# Load and prepare DataFrames
df_sales = spark.read.format("delta").load("/mnt/silver/Sales") \
    .withColumn("month", month("orderdate")) \
    .withColumn("year", year("orderdate"))

df_salesperson = spark.read.format("delta").load("/mnt/silver/Salesperson")
df_targets = spark.read.format("delta").load("/mnt/silver/Targets")

# JOIN: Sales + Salesperson on employeekey, then join with Targets on employeeid/month/year
df_joined = df_sales.alias("s") \
    .join(df_salesperson.alias("p"), col("s.employeekey") == col("p.employeekey"), "left") \
    .join(df_targets.alias("t"),
          (col("p.employeeid") == col("t.employeeid")) &
          (col("s.month") == col("t.month")) &
          (col("s.year") == col("t.year")),
          "left")

# AGGREGATE to Gold Layer
df_gold = df_joined.groupBy(
    col("s.employeekey").alias("employee_key"),
    col("p.employeeid").alias("employee_id"),
    col("p.salesperson"),
    col("s.month"),
    col("s.year"),
    col("t.target")
).agg(
    sum("s.sales").alias("total_sales")
).withColumn(
    "target_achievement_pct",
    round((col("total_sales") / col("target")) * 100, 2)
)


#Save to Gold layer
df_gold.write.format("delta").mode("overwrite").saveAsTable("sales_vs_target")


# Top 10 Resellers by Revenue

tables - sales, reseller



In [0]:
from pyspark.sql import Window
from pyspark.sql.functions import sum, rank, col

# Load data
df_sales = spark.read.format("delta").load("/mnt/silver/Sales")
df_reseller = spark.read.format("delta").load("/mnt/silver/Reseller")

# Join and aggregate total sales by reseller
df_joined = df_sales.join(df_reseller, "resellerkey", "left")

df_grouped = df_joined.groupBy("reseller") \
    .agg(sum("sales").alias("total_sales"))

# Define window for ranking
window = Window.orderBy(col("total_sales").desc())

# Add rank column
df_ranked = df_grouped.withColumn("rank", rank().over(window)).filter("rank <= 10")

# Show result
df_ranked.write.mode("overwrite").saveAsTable("top_10_resellers")


# Product Profitibility Report

tables - sales, product

In [0]:
# Load data
df_sales = spark.read.format("delta").load("/mnt/silver/Sales")
df_reseller = spark.read.format("delta").load("/mnt/silver/Product")

df_joined = df_sales.join(df_reseller, "productkey", "left")

df_joined = df_joined.withColumn("total_cost", col("standard_cost") * col("quantity"))

from pyspark.sql.functions import sum, col

df = df_joined.groupBy("product") \
    .agg(
        sum("sales").alias("total_sales"),
        sum("total_cost").alias("total_cost")
    )

df = df.withColumn("profit", col("total_sales") / col("total_cost"))

df.write.mode("overwrite").saveAsTable("profit_by_product")
