In [0]:
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import col, when, md5, concat_ws

In [0]:
entrance_exit_df = spark.read \
    .table("mta_bronze.entrance_exit")

dim_borough_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

In [0]:
entrance_exit_trans_df = entrance_exit_df \
    .withColumn("eex_complex_id", col("eex_complex_id").cast(IntegerType())) \
    .withColumn("eex_entry_allowed", when(col("eex_entry_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_exit_allowed", when(col("eex_exit_allowed") == "YES", True).otherwise(False)) \
    .withColumn("eex_sk", md5(concat_ws("_", "eex_entrance_latitude", "eex_entrance_longitude", "eex_entrance_type")))
    

In [0]:
entrance_exit_final_df = entrance_exit_trans_df.select(
    'eex_sk',
    'eex_station_id',
    'eex_gtfs_stop_id',
    'eex_entrance_type',
    'eex_entry_allowed',
    'eex_exit_allowed',
    'eex_entrance_latitude',
    'eex_entrance_longitude',
    'eex_ingestion_date',
    'eex_source'
)

In [0]:
entrance_exit_final_df.write.mode("overwrite").format("delta").partitionBy("eex_entrance_type").option("mergeSchema", "true").saveAsTable("mta_silver.dim_entrance_exit")

In [0]:
%sql
SELECT * FROM mta_silver.dim_entrance_exit LIMIT 20;

In [0]:
dbutils.notebook.exit("Success")