In [0]:
%run ../utils/common_utils

In [0]:
# Load config and authenticate
config = get_pipeline_config()
authenticate_adls(config['storage_account'], config['secret_scope'])

storage_account = config['storage_account']
container_output = config['container_output']

In [0]:
from pyspark.sql import functions as F

# 1. Define paths to your Silver Delta tables
# (Assuming your previous notebook created 'train' and 'test' folders)
train_path = get_path(container_output, storage_account, "silver/train")
test_path = get_path(container_output, storage_account, "silver/test")

# 2. Read Silver Data
df_train = spark.read.format("delta").load(train_path)
df_test = spark.read.format("delta").load(test_path)

# 3. Union them (Combining for a holistic view)
df_combined = df_train.unionByName(df_test)

# 4. Gold Transformation: Marketing Analysis
# Let's see who has the most money and their conversion rate (y)
df_gold_analysis = df_combined.groupBy("job", "education").agg(
    F.count("job").alias("total_customers"),
    F.avg("balance").alias("avg_balance"),
    F.sum(F.when(F.col("y") == "yes", 1).otherwise(0)).alias("total_conversions")
)

# 5. Add a calculation for conversion rate
df_gold_analysis = df_gold_analysis.withColumn(
    "conversion_rate", 
    (F.col("total_conversions") / F.col("total_customers")) * 100
)

# 6. Save to Gold
gold_path = get_path(container_output, storage_account, "gold/marketing_analysis")

df_gold_analysis.write.format("delta") \
    .mode("overwrite") \
    .save(gold_path)

print(f"âœ… Gold analysis table created at: {gold_path}")