In [0]:
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window

# ============================================================================
# DIMENSION 1: dim_date
# ============================================================================

@dlt.table(
    name="dim_date",
    comment="Date dimension for time-based analysis"
)
def dim_date():
    """
    Extract all unique dates from silver layer
    Add year, quarter, month, day attributes
    """
    df = dlt.read("crime_silver")
    
    # Get unique dates from date_occ
    dates_df = df.select(
        to_date(col("date_occ")).alias("full_date")
    ).distinct()
    
    return (
        dates_df
        .withColumn("date_key", 
                   regexp_replace(date_format(col("full_date"), "yyyyMMdd"), "-", "").cast("int"))
        .withColumn("year", year(col("full_date")))
        .withColumn("quarter", quarter(col("full_date")))
        .withColumn("month", month(col("full_date")))
        .withColumn("month_name", date_format(col("full_date"), "MMMM"))
        .withColumn("day_of_week", dayofweek(col("full_date")))       
        .withColumn("day_name", date_format(col("full_date"), "EEEE"))
        .withColumn("created_by", lit("SYSTEM"))
        .withColumn("created_dt", current_timestamp())
        .select(
            "date_key",
            "full_date",
            "year",
            "quarter",
            "month",
            "month_name",
            "day_of_week",
            "day_name",
            "created_by",
            "created_dt"
        )
        .orderBy("date_key")
    )

# ============================================================================
# DIMENSION 2: dim_time
# ============================================================================

@dlt.table(
    name="dim_time",
    comment="Time dimension for time-of-day analysis"
)
def dim_time():
    """
    Extract all unique times from silver layer
    Add time_period classification
    """
    df = dlt.read("crime_silver")
    
    # Get unique times
    times_df = df.select(
        col("time_occ")
    ).distinct()
    
    return (
        times_df
        .withColumn("time_key", col("time_occ"))
        .withColumn("time_period",
                   when((col("time_occ") >= 600) & (col("time_occ") < 1200), "Morning")
                   .when((col("time_occ") >= 1200) & (col("time_occ") < 1700), "Afternoon")
                   .when((col("time_occ") >= 1700) & (col("time_occ") < 2100), "Evening")
                   .otherwise("Night"))
        .withColumn("created_by", lit("SYSTEM"))
        .withColumn("created_dt", current_timestamp())
        .select(
            "time_key",
            "time_occ",
            "time_period",
            "created_by",
            "created_dt"
        )
        .orderBy("time_key")
    )

# ============================================================================
# DIMENSION 3: dim_crime_type
# ============================================================================

@dlt.table(
    name="dim_crime_type",
    comment="Crime type dimension"
)
def dim_crime_type():
    """
    Extract unique crime codes and descriptions
    """
    df = dlt.read("crime_silver")
    
    return (
        df.select(
            col("crm_cd"),
            col("crm_cd_desc"),
            col("part_1_2")
        )
        .distinct()
        .withColumn("crime_type_key", col("crm_cd"))
        .withColumn("created_by", lit("SYSTEM"))
        .withColumn("created_dt", current_timestamp())
        .select(
            "crime_type_key",
            "crm_cd",
            "crm_cd_desc",
            "part_1_2",
            "created_by",
            "created_dt"
        )
        .orderBy("crime_type_key")
    )

# ============================================================================
# DIMENSION 4: dim_weapon
# ============================================================================

@dlt.table(
    name="dim_weapon",
    comment="Weapon dimension"
)
def dim_weapon():
    """
    Extract unique weapon codes and descriptions
    """
    df = dlt.read("crime_silver")
    
    return (
        df.select(
            col("weapon_used_cd_clean").alias("weapon_used_cd"),
            col("weapon_desc_clean").alias("weapon_desc")
        )
        .distinct()
        .withColumn("weapon_key", col("weapon_used_cd"))
        .withColumn("created_by", lit("SYSTEM"))
        .withColumn("created_dt", current_timestamp())
        .select(
            "weapon_key",
            "weapon_used_cd",
            "weapon_desc",
            "created_by",
            "created_dt"
        )
        .orderBy("weapon_key")
    )

# ============================================================================
# DIMENSION 5: dim_victim
# ============================================================================

@dlt.table(
    name="dim_victim",
    comment="Victim demographic dimension (age and sex)"
)
def dim_victim():
    """
    Create victim dimension with age groups and sex
    Generate all possible combinations
    """
    # Define age groups
    age_groups = [
        "Unknown",
        "0-17",    # Juvenile
        "18-25",   # Young Adult
        "26-35",   # Adult
        "36-50",   # Middle Age
        "51+"      # Senior
    ]
    
    # Define sex categories
    sex_categories = ["M", "F", "X"]
    
    # Create all combinations
    combinations = []
    key = 1
    for age_group in age_groups:
        for sex in sex_categories:
            combinations.append((key, age_group, sex, "SYSTEM", None))
            key += 1
    
    # Create DataFrame
    schema = StructType([
        StructField("victim_key", IntegerType(), False),
        StructField("age_group", StringType(), False),
        StructField("sex", StringType(), False),
        StructField("created_by", StringType(), False),
        StructField("created_dt", TimestampType(), True)
    ])
    
    df = spark.createDataFrame(combinations, schema)
    
    return df.withColumn("created_dt", current_timestamp())

# ============================================================================
# DIMENSION 6: dim_status
# ============================================================================

@dlt.table(
    name="dim_status",
    comment="Crime status dimension"
)
def dim_status():
    """
    Extract unique status codes and descriptions
    """
    df = dlt.read("crime_silver")
    
    return (
        df.select(
            col("status_clean").alias("status_code"),
            col("status_desc")
        )
        .distinct()
        .withColumn("status_key", 
                   row_number().over(Window.orderBy("status_code")))
        .withColumn("created_by", lit("SYSTEM"))
        .withColumn("created_dt", current_timestamp())
        .select(
            "status_key",
            "status_code",
            "status_desc",
            "created_by",
            "created_dt"
        )
        .orderBy("status_key")
    )