In [0]:

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, from_json, to_timestamp, when, lit
import os

spark = SparkSession.builder.getOrCreate()


cosmos_endpoint = dbutils.secrets.get("kv", "COSMOS_ENDPOINT")
cosmos_masterkey = dbutils.secrets.get("kv", "COSMOS_KEY")
cosmos_database = "BankDB"
cosmos_container = "Transactions"  


cosmos_cfg = {
  "spark.cosmos.accountEndpoint": cosmos_endpoint,
  "spark.cosmos.accountKey": cosmos_masterkey,
  "spark.cosmos.database": cosmos_database,
  "spark.cosmos.container": cosmos_container,
  "spark.cosmos.read.inferSchema.enabled": "true"
}

df = spark.read.format("cosmos.oltp").options(**cosmos_cfg).load()


df_clean = (df
            .withColumn("TransactionTimestamp", to_timestamp(col("TransactionTime")))
            .withColumn("Amount", col("TransactionAmount").cast("double"))
            .filter(col("Amount").isNotNull())
           )


from pyspark.sql.functions import sum as _sum, avg as _avg, count as _count, month

agg = (df_clean.groupBy("CustomerID")
       .agg(_sum("Amount").alias("TotalSpent"),
            _avg("Amount").alias("AvgTxnAmount"),
            _count("*").alias("TxnCount"))
      )


jdbc_url = dbutils.secrets.get("kv", "SYNAPSE_JDBC_URL")
jdbc_user = dbutils.secrets.get("kv", "SYNAPSE_USER")
jdbc_pass = dbutils.secrets.get("kv", "SYNAPSE_PASSWORD")

synapse_table = "dbo.Customer360_Fact"

agg.write \
   .format("com.databricks.spark.sqldw") \
   .option("url", jdbc_url) \
   .option("user", jdbc_user) \
   .option("password", jdbc_pass) \
   .option("dbtable", synapse_table) \
   .option("forward_spark_azure_storage_credentials", "true") \
   .option("tempdir", "wasbs://<container>@<storage>.blob.core.windows.net/tempdir") \
   .mode("overwrite") \
   .save()
