In [0]:
from pyspark.sql.types import DateType
from pyspark.sql.functions import md5, concat_ws, col, initcap, when

In [0]:
major_incident_df = spark.read \
    .table("mta_bronze.major_incident")

dim_lines_df = spark.read \
    .table("mta_silver.dim_line")

dim_inc_category_df = spark.read \
    .table("mta_silver.dim_inc_category")

In [0]:
major_incidents_transform_df = major_incident_df \
    .join(dim_lines_df, major_incident_df.inc_line == dim_lines_df.lin_nk, "inner") \
    .join(dim_inc_category_df, major_incident_df.inc_category == dim_inc_category_df.ict_category, "inner") \
    .withColumn("inc_sk", md5(concat_ws("_", "inc_month", "inc_division", "inc_line", "inc_day_type", "inc_category"))) \
    .withColumn("inc_division", initcap(col("inc_division"))) \
    .withColumn("dte_sk", col("inc_month").cast(DateType())) \
    .drop("inc_month") \
    .withColumn("inc_day_type", when(col("inc_day_type") == 1, "Weekday").otherwise("Weekend"))

In [0]:
major_incident_final_df = major_incidents_transform_df.select(
    'inc_sk',
    'lin_sk',
    'ict_sk',
    'dte_sk',
    'inc_count',
    'inc_ingestion_date',
    'inc_source',
)

In [0]:
major_incident_final_df.write \
  .mode("overwrite") \
  .format("delta") \
  .partitionBy("dte_sk") \
  .option("mergeSchema", "true") \
  .saveAsTable("mta_silver.fct_major_incident")

In [0]:
%sql
SELECT * FROM mta_silver.fct_major_incident LIMIT 20;

inc_sk,lin_sk,ict_sk,dte_sk,inc_count,inc_ingestion_date,inc_source
6d8741eaab3385d84a71630652c3b251,e1e1d3d40573127e9ee0480caf1283d6,97622cf2e8771871841450151d8f6c3b,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
6bfc27e4ac49b8ee61e93f5a3cc43e45,e1e1d3d40573127e9ee0480caf1283d6,3855e9349c45f9dd73c812e7d4fde893,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
4572d0f63e5dc2c96b3b428caff3ec9d,e1e1d3d40573127e9ee0480caf1283d6,cadd2b2ad06d8a0caee658e3c05e615a,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
47bdc42ad9153558b19e939cb425e2cc,e1e1d3d40573127e9ee0480caf1283d6,5be0ee9a2a4d1ffddc897625771606ab,2018-01-01,3,2025-03-09T06:41:37.298Z,data.gov
457bf6e2a3e7d7aa952c9909c4ac7944,f09564c9ca56850d4cd6b3319e541aee,cadd2b2ad06d8a0caee658e3c05e615a,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
ad6df157c08a8115633cdd3aa79c5dba,f09564c9ca56850d4cd6b3319e541aee,92ea731d3af6677905303c88689f5d55,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
51f899816c3fc682831b0c7c65f34bcc,f09564c9ca56850d4cd6b3319e541aee,5be0ee9a2a4d1ffddc897625771606ab,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
8d13b70eb01726be726dddcccbbcd4b0,f09564c9ca56850d4cd6b3319e541aee,6311ae17c1ee52b36e68aaf4ad066387,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
e30a222edea6f897ff1a2f239a0e04a4,8d9c307cb7f3c4a32822a51922d1ceaa,92ea731d3af6677905303c88689f5d55,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov
0c0dcbe95a31db04432e8d075b708341,8d9c307cb7f3c4a32822a51922d1ceaa,cadd2b2ad06d8a0caee658e3c05e615a,2018-01-01,1,2025-03-09T06:41:37.298Z,data.gov


In [0]:
dbutils.notebook.exit("Success")