In [0]:
from pyspark.sql.functions import current_timestamp, lit, col

# --- Setup ---
dbutils.widgets.text("run_date", "2025-12-09")
run_date = dbutils.widgets.get("run_date")

source_table = "final_project.default.crimes_raw"
target_table = "final_project.bronze.crimes_bronze"

try:
    # ==============================================================================
    # STEP 1: Ingestion & Schema Sanitization
    # ==============================================================================
    print(f"Reading from source: {source_table}")
    raw_df = spark.read.table(source_table)
    
    # Delta Lake does not support spaces in column names by default.
    # We must replace spaces with underscores (e.g., "Case Number" -> "Case_Number")
    print("Sanitizing column names (replacing spaces with underscores)...")
    clean_columns = [col(c).alias(c.replace(" ", "_")) for c in raw_df.columns]
    sanitized_df = raw_df.select(*clean_columns)
    
    # ==============================================================================
    # STEP 2: Metadata Enrichment (Audit Columns)
    # ==============================================================================
    print("Adding metadata columns...")
    # Adding timestamp and source info for lineage tracking
    bronze_df = sanitized_df.withColumn("ingestion_timestamp", current_timestamp()) \
                            .withColumn("source_system", lit(source_table)) \
                            .withColumn("run_date", lit(run_date))
    
    # ==============================================================================
    # STEP 3: Data Storage (Bronze Layer)
    # ==============================================================================
    print(f"Writing to target: {target_table}")
    # Using 'mergeSchema' to handle potential new columns in future loads
    bronze_df.write \
        .format("delta") \
        .mode("overwrite") \
        .option("mergeSchema", "true") \
        .saveAsTable(target_table)
        
    count = spark.read.table(target_table).count()
    print(f"Bronze ingestion successful. Total records: {count}")

except Exception as e:
    print(f"Error during Bronze ingestion: {str(e)}")