In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window
import json

print("Starting Recommendation Engine...")


In [0]:
silver_df = spark.table("silver_credit_transactions")

print("Silver table loaded successfully")
print(f"Total records: {silver_df.count()}")


In [0]:
valid_txn_df = silver_df.filter(col("is_fraud") == 0)


In [0]:
#User Spending Profile
user_category_profile = (
    valid_txn_df
    .groupBy("cc_num", "category")
    .agg(
        sum("amt").alias("total_spent"),
        count("*").alias("txn_count")
    )
)


In [0]:
#Rank Categories per User
from pyspark.sql.window import Window

window_spec = Window.partitionBy("cc_num").orderBy(desc("total_spent"))

ranked_categories = (
    user_category_profile
    .withColumn("rank", rank().over(window_spec))
)

In [0]:
#Generate Top-N Recommendations
recommendations_df = (
    ranked_categories
    .filter(col("rank") <= 3)
    .select(
        col("cc_num").alias("customer_id"),
        "category",
        "total_spent",
        "txn_count",
        "rank"
    )
)

recommendations_df.show(20, truncate=False)


In [0]:
#Write Recommendations Table
recommendations_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_customer_category_recommendations")

print("Recommendation table created successfully!")
