In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window
from pyspark.sql.types import *
from pyspark.sql import *
from datetime import datetime,timedelta
from zoneinfo import ZoneInfo

In [0]:
spark.conf.set(
    "fs.azure.account.key.bmwstorageacc.dfs.core.windows.net",
    dbutils.secrets.get(scope = "bmwanalytics", key = "bmwstorevalut")
)

In [0]:
PIPELINE_VERSION = "V-"+datetime.now(ZoneInfo("Asia/Kolkata")).strftime("%Y%m%d-%H%M%S")
storage = "abfss://bmwstorage@bmwstorageacc.dfs.core.windows.net"
sliver_path = f"{storage}/sliver/bmw_sales/sales"
gold_path = f"{storage}/gold/bmw_sales/sales/"
try:
    version = [x for x in dbutils.fs.ls(sliver_path) if x.isDir() and x.name.startswith("V-")]
    if len(version) > 0:
        latest = sorted(version, key=lambda x: x.name)[-1]
        PIPELINE_VERSION = latest.name
except Exception as e:
    print(e)

In [0]:
sliver = spark.read.format("delta").load(f"{sliver_path}/{PIPELINE_VERSION}")
display(sliver)

In [0]:
sliver = sliver.select(
    "vehicle_id",
    "model",
    "year",
    "region",
    "color",
    "fuel_type",
    "transmission",
    "engine_size",
    "mileage",
    "price",
    "sales_volume",
    "sales_classification",
    "revenue_usd",
    "price_brand"
)

In [0]:
sliver.display()

In [0]:
sliver.columns

In [0]:
window = Window.partitionBy("model","year","region","fuel_type","transmission").orderBy("sales_volume")

In [0]:
gold_yearly = (
    sliver
    .groupBy("model","year","region")
    .agg(
        sum("sales_volume").alias("total_sales_volume"),
        sum("revenue_usd").alias("total_revenue_usd"),
        avg("price").alias("avg_price_usd"),
        avg("mileage").alias("avg_mileage"),
        avg("engine_size").alias("avg_engine_size"),
    )
    .orderBy(col("year"), ascending=False))


In [0]:
gold_yearly.display()

In [0]:
(
    gold_yearly
    .write
    .format("delta")
    .option("overwriteSchema", "true")
    .mode("overwrite")
    .save(gold_path+f"sales_and_revenue/{PIPELINE_VERSION}")
)