In [0]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import col, when, md5, concat_ws

In [0]:
entrance_exit_df = spark.read \
    .table("mta_bronze.entrance_exit")

dim_borough_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

In [0]:
entrance_exit_trans_df = entrance_exit_df \
    .withColumn("eex_complex_id", col("eex_complex_id").cast(IntegerType())) \
    .withColumn("eex_entry_allowed", when(col("eex_entry_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_exit_allowed", when(col("eex_exit_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_sk", md5(concat_ws("_", "eex_entrance_latitude", "eex_entrance_longitude", "eex_entrance_type")))
    

In [0]:
entrance_exit_final_df = entrance_exit_trans_df.select(
    'eex_sk',
    'eex_station_id',
    'eex_gtfs_stop_id',
    'eex_entrance_type',
    'eex_entry_allowed',
    'eex_exit_allowed',
    'eex_entrance_latitude',
    'eex_entrance_longitude',
    'eex_ingestion_date',
    'eex_source'
)

In [0]:
entrance_exit_final_df.write \
  .mode("overwrite") \
  .format("delta") \
  .partitionBy("eex_entrance_type") \
  .option("mergeSchema", "true") \
  .saveAsTable("mta_silver.dim_entrance_exit")

In [0]:
%sql
SELECT * FROM mta_silver.dim_entrance_exit LIMIT 20;

eex_sk,eex_station_id,eex_gtfs_stop_id,eex_entrance_type,eex_entry_allowed,eex_exit_allowed,eex_entrance_latitude,eex_entrance_longitude,eex_ingestion_date,eex_source
1dd375f0153da976f710858417059601,25,R29,Easement - Street,True,True,40.6920651,-73.9851702,2025-03-09T06:16:21.961Z,data.gov
617dd237c968e87f501fdcd99e6c5f12,26,R30,Easement - Street,True,True,40.6912625,-73.9823804,2025-03-09T06:16:21.961Z,data.gov
7362e91ef628b4cec03d0a173ccafa52,26,R30,Easement - Street,True,True,40.68981,-73.981651,2025-03-09T06:16:21.961Z,data.gov
4745be3a51fc84c2c768c06d7ac9633b,26,R30,Easement - Street,True,True,40.689804,-73.981164,2025-03-09T06:16:21.961Z,data.gov
73a9ce65ba295285b8a0ed2e3fd638a8,26,R30,Easement - Street,True,True,40.68974,-73.981156,2025-03-09T06:16:21.961Z,data.gov
b0ece91fddc5f99f0a9591eaf8560b7d,26,R30,Easement - Street,True,True,40.689817,-73.981082,2025-03-09T06:16:21.961Z,data.gov
17516fb345fd9d9ed15d9a59687eef88,39,R45,Easement - Street,True,True,40.6159342,-74.0308333,2025-03-09T06:16:21.961Z,data.gov
1bce0906054ddd4fa9f57619669c3ab7,40,D24,Easement - Street,True,True,40.6852595,-73.9775209,2025-03-09T06:16:21.961Z,data.gov
0c0c47128504902bd37147a0166009f9,40,D24,Easement - Street,True,True,40.6834124,-73.9765033,2025-03-09T06:16:21.961Z,data.gov
d048b898f25a3a7546db19cfd42cb182,174,A41,Easement - Street,True,True,40.6924743,-73.9875859,2025-03-09T06:16:21.961Z,data.gov


In [0]:
dbutils.notebook.exit("Success")