# Gold Layer: Create Risk Assessment Features
Azure Synapse Analytics - Medallion Architecture

**Pattern**: Risk scoring based on claims history

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, coalesce, lit, current_timestamp
import logging

In [None]:
# Configuration - ADLS Gen2 paths
STORAGE_ACCOUNT = "<storage-account-name>"
TABLES_ROOT = f"abfss://tables@{STORAGE_ACCOUNT}.dfs.core.windows.net"

SILVER_POLICIES_PATH = f"{TABLES_ROOT}/silver/silver_policies"
GOLD_CLAIMS_FEATURES_PATH = f"{TABLES_ROOT}/gold/gold_claims_features"
GOLD_RISK_PATH = f"{TABLES_ROOT}/gold/gold_risk_features"

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
def main():
    spark = SparkSession.builder.getOrCreate()
    
    try:
        logger.info("Reading policies and claims features")
        df_policies = spark.read.format("delta").load(SILVER_POLICIES_PATH)
        df_claims_features = spark.read.format("delta").load(GOLD_CLAIMS_FEATURES_PATH)
        
        logger.info(f"Read {df_policies.count()} policies from Silver layer")
        
        # Join claims features with policies
        risk_features = df_policies.join(df_claims_features, on="customer_id", how="left")
        
        # Calculate risk components first
        risk_features = risk_features \
            .withColumn("claims_risk_component", 
                       (coalesce(col("total_claims"), lit(0)) * 10).cast("double")) \
            .withColumn("amount_risk_component",
                       when(col("total_claim_amount") > 50000, 40)
                       .when(col("total_claim_amount") > 10000, 20)
                       .otherwise(0).cast("double"))
        
        # Calculate overall risk score (0-100 scale)
        risk_features = risk_features \
            .withColumn("overall_risk_score", 
                       when((lit(30) + 
                            col("claims_risk_component") + 
                            col("amount_risk_component")) > 100, 100)
                       .otherwise(lit(30) + 
                                 col("claims_risk_component") + 
                                 col("amount_risk_component"))
                       .cast("double")) \
            .withColumn("feature_timestamp", current_timestamp()) \
            .drop("claims_risk_component", "amount_risk_component")
        
        feature_count = risk_features.count()
        logger.info(f"Created risk features for {feature_count} policies")
        
        # Write to Gold
        logger.info(f"Writing to {GOLD_RISK_PATH}")
        risk_features.write \
            .format("delta") \
            .mode("overwrite") \
            .option("description", "Gold layer: Risk assessment features for ML") \
            .save(GOLD_RISK_PATH)
        
        logger.info("✓ Risk features creation completed")
        
    except Exception as e:
        logger.error(f"✗ Failed to create risk features: {str(e)}")
        raise

In [None]:
main()