In [0]:
from pyspark.sql.types import BooleanType
from pyspark.sql.functions import col, md5, concat_ws

In [0]:
stations_df = spark.read \
    .table("mta_bronze.station")

dim_boroughs_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

In [0]:
stations_trans_df = stations_df \
    .join(dim_boroughs_df, stations_df.stn_borough == dim_boroughs_df.brh_nk, "left") \
    .join(lkp_stations_df, stations_df.stn_stop_name == lkp_stations_df.lks_abbr, "left") \
    .withColumn("stn_borough", col("brh_name")) \
    .withColumn("stn_stop_name", col("lks_name")) \
    .withColumn("stn_sk", md5(concat_ws("_", col("stn_gtfs_stop_id"), col("stn_id"), ))) \
    .withColumn("stn_ada", col("stn_ada").cast(BooleanType())) \
    .withColumn("stn_ada_northbound", col("stn_ada_northbound").cast(BooleanType())) \
    .withColumn("stn_ada_southbound", col("stn_ada_southbound").cast(BooleanType()))

In [0]:
stations_final_df = stations_trans_df.select(
    'stn_sk',
    col('stn_id').alias('stn_nk'),
    'stn_gtfs_stop_id',
    'stn_division',
    'stn_stop_name',
    'stn_borough',
    'stn_structure',
    'stn_gtfs_latitude',
    'stn_gtfs_longitude',
    'stn_north_direction_lbl',
    'stn_south_direction_lbl',
    'stn_ada',
    'stn_ada_northbound',
    'stn_ada_southbound',
    'stn_ada_notes',
    'stn_ingestion_date',
    'stn_source'
)

In [0]:
stations_final_df.write \
    .mode("overwrite") \
    .format("delta") \
    .partitionBy("stn_borough") \
    .option("mergeSchema", "true") \
    .saveAsTable("mta_silver.dim_station")

In [0]:
%sql
SELECT * FROM mta_silver.dim_station;

stn_sk,stn_nk,stn_gtfs_stop_id,stn_division,stn_stop_name,stn_borough,stn_structure,stn_gtfs_latitude,stn_gtfs_longitude,stn_north_direction_lbl,stn_south_direction_lbl,stn_ada,stn_ada_northbound,stn_ada_southbound,stn_ada_notes,stn_ingestion_date,stn_source
964ea270e973326480e598effb3f8ba4,348,253,IRT,Rockaway Avenue,Brooklyn,Elevated,40.662549,-73.908946,Manhattan,New Lots,False,False,False,,2025-03-09T06:42:58.45Z,data.gov
eaa7e4f695bd04c878732415342cb8d9,120,L08,BMT,Bedford Avenue,Brooklyn,Subway,40.717304,-73.956872,Manhattan,Outbound,True,True,True,,2025-03-09T06:42:58.45Z,data.gov
2db498271c33c0774305a2a86d35a7e7,98,M12,BMT,Flushing Avenue,Brooklyn,Elevated,40.70026,-73.941126,Outbound,Manhattan,True,True,True,,2025-03-09T06:42:58.45Z,data.gov
33b0de4e2555802b483e0a7cec3d425a,359,247,IRT,Flatbush Avenue-Brooklyn College,Brooklyn,Subway,40.632836,-73.947642,Manhattan,Last Stop,True,True,True,,2025-03-09T06:42:58.45Z,data.gov
3c74dbfa4eb174de84539cb85a9b6dd2,338,235,IRT,Atlantic Avenue-Barclays Center,Brooklyn,Subway,40.684359,-73.977666,Manhattan,Outbound,True,True,True,,2025-03-09T06:42:58.45Z,data.gov
29288e754b63712fa94ed919e92f559d,71,N02,BMT,8th Avenue,Brooklyn,Open Cut,40.635064,-74.011719,Manhattan,Coney Island,True,True,True,,2025-03-09T06:42:58.45Z,data.gov
9f88794f2c91d81d090e9f53ff4ee9e6,178,A45,IND,Franklin Avenue,Brooklyn,Subway,40.68138,-73.956848,Manhattan,Outbound,True,True,True,,2025-03-09T06:42:58.45Z,data.gov
4890fb21632eecfb35983401267c4f26,132,L22,BMT,Broadway Junction,Brooklyn,Elevated,40.678856,-73.90324,Manhattan,Outbound,False,False,False,,2025-03-09T06:42:58.45Z,data.gov
d7cb075bc8532b2315d762c9eaf14a9f,88,J21,BMT,Norwood Avenue,Brooklyn,Elevated,40.68141,-73.880039,Outbound,Manhattan,False,False,False,,2025-03-09T06:42:58.45Z,data.gov
461830520da3e82f3608b21d47f87248,56,D41,BMT,Ocean Parkway,Brooklyn,Elevated,40.576312,-73.968501,Manhattan,Coney Island,False,False,False,,2025-03-09T06:42:58.45Z,data.gov


In [0]:
dbutils.notebook.exit("Success")