In [0]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import col, when, md5, concat_ws

In [0]:
entrance_exit_df = spark.read \
    .table("mta_bronze.entrance_exit")

dim_borough_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

In [0]:
entrance_exit_trans_df = entrance_exit_df \
    .withColumn("eex_complex_id", col("eex_complex_id").cast(IntegerType())) \
    .withColumn("eex_entry_allowed", when(col("eex_entry_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_exit_allowed", when(col("eex_exit_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_sk", md5(concat_ws("_", "eex_entrance_latitude", "eex_entrance_longitude", "eex_entrance_type")))
    

In [0]:
entrance_exit_final_df = entrance_exit_trans_df.select(
    'eex_sk',
    'eex_station_id',
    'eex_gtfs_stop_id',
    'eex_entrance_type',
    'eex_entry_allowed',
    'eex_exit_allowed',
    'eex_entrance_latitude',
    'eex_entrance_longitude',
    'eex_ingestion_date',
    'eex_source'
)

In [0]:
entrance_exit_final_df.write \
  .mode("overwrite") \
  .format("delta") \
  .partitionBy("eex_entrance_type") \
  .option("mergeSchema", "true") \
  .saveAsTable("mta_silver.dim_entrance_exit")

In [0]:
%sql
SELECT * FROM mta_silver.dim_entrance_exit LIMIT 10;

eex_sk,eex_station_id,eex_gtfs_stop_id,eex_entrance_type,eex_entry_allowed,eex_exit_allowed,eex_entrance_latitude,eex_entrance_longitude,eex_ingestion_date,eex_source
560b83760f2e88ac515931cf99fc084f,255,F02,Easement - Street,True,True,40.7109796,-73.7927502,2025-03-09T07:17:34.45Z,data.gov
58642c209c8564fd0b7b39dc1fd82e6e,263,G10,Easement - Street,True,True,40.7296038,-73.8617722,2025-03-09T07:17:34.45Z,data.gov
f5115e6d874b2ba61303f188f48436a9,266,G13,Easement - Street,True,True,40.742052,-73.8810756,2025-03-09T07:17:34.45Z,data.gov
2c6d07d8d57034e3c7d6d1e94c680693,267,G14,Easement - Street,True,True,40.7472534,-73.8921113,2025-03-09T07:17:34.45Z,data.gov
550cca4af223001687ca36fbc9163c42,274,F09,Easement - Street,True,True,40.7477378,-73.9446888,2025-03-09T07:17:34.45Z,data.gov
d25de6fe96b3ce8bce240b5d823f5c1a,274,F09,Easement - Street,True,True,40.7473711,-73.9442712,2025-03-09T07:17:34.45Z,data.gov
b40437b6c5be061b88f850de310de780,281,G22,Easement - Street,True,True,40.7468858,-73.9435169,2025-03-09T07:17:34.45Z,data.gov
67cad54532d5164304e88a881aeaf79b,461,718; R09,Easement - Street,True,True,40.750835,-73.939671,2025-03-09T07:17:34.45Z,data.gov
21d49bf52fe3c0660212f8ed454368ce,48,D32,Walkway,False,True,40.6298896,-73.9618253,2025-03-09T07:17:34.45Z,data.gov
0562ee329cd33ac8d6c19a6679639bc4,48,D32,Walkway,False,True,40.6299239,-73.9615842,2025-03-09T07:17:34.45Z,data.gov


In [0]:
dbutils.notebook.exit("Success")