In [0]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import col, when, md5, concat_ws

In [0]:
entrance_exit_df = spark.read \
    .table("mta_bronze.entrance_exit")

In [0]:
entrance_exit_trans_df = entrance_exit_df \
    .withColumn("eex_complex_id", col("eex_complex_id").cast(IntegerType())) \
    .withColumn("eex_entry_allowed", when(col("eex_entry_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_exit_allowed", when(col("eex_exit_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_sk", md5(concat_ws("_", "eex_entrance_latitude", "eex_entrance_longitude", "eex_entrance_type")))
    

In [0]:
entrance_exit_final_df = entrance_exit_trans_df.select(
    'eex_sk',
    'eex_station_id',
    'eex_gtfs_stop_id',
    'eex_entrance_type',
    'eex_entry_allowed',
    'eex_exit_allowed',
    'eex_entrance_latitude',
    'eex_entrance_longitude',
    'eex_ingestion_date',
    'eex_source'
)

In [0]:
entrance_exit_final_df.write \
  .mode("overwrite") \
  .format("delta") \
  .partitionBy("eex_entrance_type") \
  .option("mergeSchema", "true") \
  .saveAsTable("mta_silver.dim_entrance_exit")

In [0]:
%sql
SELECT * FROM mta_silver.dim_entrance_exit LIMIT 10;

eex_sk,eex_station_id,eex_gtfs_stop_id,eex_entrance_type,eex_entry_allowed,eex_exit_allowed,eex_entrance_latitude,eex_entrance_longitude,eex_ingestion_date,eex_source
5a707c192dfeb4f129be90de376c0906,503,S29,Stair,True,True,40.6273214,-74.075095,2025-03-09T14:02:24.58Z,data.gov
1344766b20fc6194f33d13f00f8f2fb1,503,S29,Stair,True,True,40.6286019,-74.0752482,2025-03-09T14:02:24.58Z,data.gov
123c530b3542c9ff5ea11ce80d342eef,504,S28,Stair,True,True,40.6207463,-74.0710103,2025-03-09T14:02:24.58Z,data.gov
03646af8ca1f715f2c55485aabca9bf3,504,S28,Stair,True,True,40.6216363,-74.0717437,2025-03-09T14:02:24.58Z,data.gov
64ef37ef509962ba828a190d0ccec911,504,S28,Stair,True,True,40.6215752,-74.0717062,2025-03-09T14:02:24.58Z,data.gov
71fbd95a207c080b789491614a801801,504,S28,Stair,True,True,40.6218984,-74.0716411,2025-03-09T14:02:24.58Z,data.gov
dc3021ebe6156b6685ebf19585be122d,505,S27,Stair,True,True,40.6036771,-74.0836629,2025-03-09T14:02:24.58Z,data.gov
bbb6aa090e53a938366ad08536e6fd8a,506,S26,Stair,True,True,40.5963353,-74.0874145,2025-03-09T14:02:24.58Z,data.gov
c9bc95127ee55afd823fce4a231fe2fe,506,S26,Stair,True,True,40.5960874,-74.0875644,2025-03-09T14:02:24.58Z,data.gov
e99062fd40419344983cacfa1b31ac05,506,S26,Stair,True,True,40.5961168,-74.087835,2025-03-09T14:02:24.58Z,data.gov


In [0]:
dbutils.notebook.exit("Success")