# 🔍 Report Output Validation Notebook

In [0]:
from pyspark.sql import functions as F

# Load data from Unity Catalog table
df = spark.table("finance.kyc_ml.customer_enriched")

### ✅ Check Schema and Sample Records

In [0]:
df.printSchema()
df.show(10)

### 📅 Validate `step` to `date` Mapping

In [0]:
df.select("step", "date").orderBy("date").distinct().show(10)

### 📊 Fraud Summary by Date

In [0]:
summary_df = (
    df.groupBy("date")
    .agg(
        F.count("*").alias("txn_count"),
        F.sum(F.when(F.col("isFraud") == 1, 1).otherwise(0)).alias("fraud_count")
    )
    .withColumn("fraud_rate_pct", F.round(F.col("fraud_count") * 100.0 / F.col("txn_count"), 2))
    .orderBy("date")
)

display(summary_df)

### 📄 Validate Generated Report Table: `kyc_risk_summary`

In [0]:
report_df = spark.table("finance.kyc_ml.kyc_risk_summary")
report_df.show(20)

### 🏷️ Check Unity Catalog Lineage Tags (for report table)

In [0]:
spark.sql("SHOW TBLPROPERTIES finance.kyc_ml.kyc_risk_summary").show(truncate=False)