In [0]:
from pyspark.sql.functions import row_number, desc, md5, col
from pyspark.sql.window import Window

In [0]:
major_incidents_df = spark.read \
    .table("mta_bronze.major_incident")

In [0]:
inc_category_df = major_incidents_df \
    .select("inc_category").distinct() \
    .orderBy(desc("inc_category")) \
    .withColumn("ict_nk", row_number().over(Window.orderBy(desc("inc_category")))) \
    .withColumn("ict_sk", md5("inc_category"))

In [0]:
inc_category_final_df = inc_category_df.select(
    col("ict_sk"),
    col("ict_nk"),
    col("inc_category").alias("ict_category")
)

In [0]:
inc_category_final_df.write \
    .mode("overwrite") \
    .format("delta") \
    .option("mergeSchema", "true") \
    .saveAsTable("mta_silver.dim_inc_category")

In [0]:
%sql
SELECT * FROM mta_silver.dim_inc_category LIMIT 10;

ict_sk,ict_nk,ict_category
92ea731d3af6677905303c88689f5d55,1,Track
97622cf2e8771871841450151d8f6c3b,2,Subway Car
3855e9349c45f9dd73c812e7d4fde893,3,Stations and Structure
cadd2b2ad06d8a0caee658e3c05e615a,4,Signals
5be0ee9a2a4d1ffddc897625771606ab,5,Persons on Trackbed/Police/Medical
6311ae17c1ee52b36e68aaf4ad066387,6,Other


In [0]:
dbutils.notebook.exit("Success")