# 20 – Risk Scoring (Rule-based) + MLflow

In [None]:
from pyspark.sql import functions as F
import mlflow

GOLD = f"{CATALOG}.{SCHEMA_GOLD}"
features = spark.table(f"{GOLD}.go_entity_features")

score = (F.when(F.col("pep_flag") == True, 40).otherwise(0)
         + F.when(F.col("sanctions_hits_30d") > 0, 35).otherwise(0)
         + F.when(F.col("geo_risk") == "HIGH", 15).when(F.col("geo_risk") == "MEDIUM", 5).otherwise(0)
         + F.when(F.col("net_flow_30d") > 100000, 10).otherwise(0))

scored = (features
  .withColumn("risk_score", score)
  .withColumn("band", F.when(F.col("risk_score") >= 60, "HIGH")
                       .when(F.col("risk_score") >= 30, "MEDIUM")
                       .otherwise("LOW"))
  .withColumn("run_ts", F.current_timestamp()))

mlflow.set_experiment("/Shared/kyc_reporting_factory")
with mlflow.start_run(run_name="risk_scoring_demo") as run:
    (scored.write.mode("overwrite").saveAsTable(f"{GOLD}.go_risk_scores"))
    dist = (scored.groupBy("band").count().toPandas().set_index("band")["count"].to_dict())
    mlflow.log_metrics({f"band_{k}": float(v) for k, v in dist.items()})
    mlflow.log_param("rule_version", "v1.0")
    mlflow.log_param("thresholds", ">=60:HIGH, >=30:MEDIUM")

print("Wrote table:", f"{GOLD}.go_risk_scores")