### Imported SparkSession & Functions

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

In [0]:
spark = SparkSession.builder.appName("AnalyticsView").getOrCreate()

### Read Data Form Fact And Dimensions Table

In [0]:
dim_path = "s3://ecommerce-data-pipeline-sanket/warehouse/dimensions/"
fact_path = "s3://ecommerce-data-pipeline-sanket/warehouse/facts/"

dim_customer = spark.read.parquet(dim_path + "dim_customer/")
dim_product = spark.read.parquet(dim_path + "dim_product/")
dim_region  = spark.read.parquet(dim_path + "dim_region/")
dim_date    = spark.read.parquet(dim_path + "dim_date/")
fact_sales  = spark.read.parquet(fact_path + "fact_sales/")

### Performed Joins To All Dimensions Tables

In [0]:
dim_region = dim_region.withColumnRenamed("Region", "region_name")

analytics_df = (fact_sales
    .join(dim_customer, on="customer_id", how="left")
    .join(dim_product, on="product_id", how="left")
    .join(dim_region, fact_sales["Region"] == dim_region["region_name"], "left")
    .join(dim_date, fact_sales["Order_Date"] == dim_date["order_date"], "left")
)

### Performed Operations 

In [0]:
sales_summary = (
    analytics_df
    .groupBy("region_name", "Category", "year", "month")
    .agg(
        round(sum("Sales"), 2).alias("total_sales"),
        round(sum("Profit"), 2).alias("total_profit"),
        sum("Quantity").alias("total_quantity")
    )
)

display(sales_summary)

region_name,Category,year,month,total_sales,total_profit,total_quantity
West,Technology,2023,10,643877.52,411414.32,824
West,Technology,2025,6,800054.08,575710.96,1208
South,Furniture,2025,6,418688.96,135478.72,512
West,Furniture,2024,8,658692.56,464210.56,888
East,Furniture,2025,3,799605.4,521279.66,1199
Central,Office Supplies,2022,5,425170.4,464279.04,760
Central,Furniture,2024,4,297233.76,347391.12,432
East,Office Supplies,2025,7,903572.67,539138.38,858
Central,Office Supplies,2024,2,642133.2,548400.96,768
East,Office Supplies,2025,6,1029484.94,601779.53,1386


### Loaded Data To Analytics 

In [0]:
analytics_path = "s3://ecommerce-data-pipeline-sanket/warehouse/analytics/sales_summary/"
sales_summary.write.mode("overwrite").parquet(analytics_path)