In [0]:
from pyspark.sql.functions import md5, concat_ws, col, when

In [0]:
wifi_location_df = spark.read \
    .table("mta_bronze.wifi_location")

dim_boroughs_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

stations_df = spark.read \
    .table("mta_silver.dim_station")

In [0]:
wifi_location_transform_df = wifi_location_df \
    .where("wfi_station is not null") \
    .join(dim_boroughs_df, wifi_location_df.wfi_borough == dim_boroughs_df.brh_name, "inner") \
    .join(lkp_stations_df, wifi_location_df.wfi_station == lkp_stations_df.lks_abbr, "inner") \
    .join(stations_df, \
        (wifi_location_df.wfi_longitude == stations_df.stn_gtfs_longitude) & \
        (wifi_location_df.wfi_latitude == stations_df.stn_gtfs_latitude), "left") \
    .withColumn("wfi_sk", md5(concat_ws("_", col("wfi_station"), col("wfi_station_complex"), col("wfi_lines")))) \
    .withColumn("wfi_is_historical", when(col("wfi_is_historical") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_wifi_available", when(col("wfi_wifi_available") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_att", when(col("wfi_att") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_sprint", when(col("wfi_sprint") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_tmobile", when(col("wfi_tmobile") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_verizon", when(col("wfi_verizon") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_connectivity_score", \
        (col("wfi_wifi_available").cast("int") + col('wfi_att').cast("int") + col('wfi_sprint').cast("int") + col('wfi_tmobile').cast("int") + col('wfi_verizon').cast("int"))) \
    .dropDuplicates(["wfi_sk"])


In [0]:
wifi_location_final_df = wifi_location_transform_df.select(
    "wfi_sk",
    "brh_sk",
    "stn_sk",
    "wfi_wifi_available",
    "wfi_att",
    "wfi_sprint",
    "wfi_tmobile",
    "wfi_verizon",
    "wfi_connectivity_score",
    "wfi_ingestion_date",
    "wfi_source"
)

In [0]:
wifi_location_final_df.write \
    .mode("overwrite") \
    .format("delta") \
    .option("mergeSchema", "true") \
    .saveAsTable("mta_silver.fct_wifi_location")

In [0]:
%sql
SELECT * FROM mta_silver.fct_wifi_location;

wfi_sk,brh_sk,stn_sk,wfi_wifi_available,wfi_att,wfi_sprint,wfi_tmobile,wfi_verizon,wfi_connectivity_score,wfi_ingestion_date,wfi_source
aed5060b271313610349a1a5d6f556f7,69691c7bdcc3ce6d5d8a1361f22d04ac,5250fa53ec8a4a9ab0bff2a85bc79ab2,False,False,True,False,False,1,2025-03-09T06:46:23.488Z,data.gov
ba697363cfc0a8da78d3da51fed21d24,69691c7bdcc3ce6d5d8a1361f22d04ac,,False,True,True,True,True,4,2025-03-09T06:46:23.488Z,data.gov
ad85c0c097c601c641760bf86e4b5337,69691c7bdcc3ce6d5d8a1361f22d04ac,ba52fd5950b5da54590e28f94b4c643a,False,False,True,False,False,1,2025-03-09T06:46:23.488Z,data.gov
e48ac383cf8feea7a1e01b9bfd8f6f58,69691c7bdcc3ce6d5d8a1361f22d04ac,f9b69c04a62ca9a82d1db7ab2cf4b38e,False,True,True,True,True,4,2025-03-09T06:46:23.488Z,data.gov
adef4acc7b2591a9a63707aede05f874,69691c7bdcc3ce6d5d8a1361f22d04ac,bf6194f5fa763f187367eb2eb5ab573c,False,False,True,False,False,1,2025-03-09T06:46:23.488Z,data.gov
de40bab4d8c83381fcb73043a483ea9a,69691c7bdcc3ce6d5d8a1361f22d04ac,a3f2302c797e5415e26722ed5bc9cec7,False,False,True,False,False,1,2025-03-09T06:46:23.488Z,data.gov
12029b21b92eff67a1c2d2a822be5181,69691c7bdcc3ce6d5d8a1361f22d04ac,77c47ce92ce9d71771795af337dc5917,False,True,True,True,True,4,2025-03-09T06:46:23.488Z,data.gov
1e57aff4eb475ba9108c44adadd325a2,69691c7bdcc3ce6d5d8a1361f22d04ac,a67ebaa92593889589c917ab6e423781,False,False,True,False,False,1,2025-03-09T06:46:23.488Z,data.gov
f50b831576d5fccf367dca326b404291,69691c7bdcc3ce6d5d8a1361f22d04ac,0ce2e82ea41d08c001a3fc3576d14b53,False,True,True,True,True,4,2025-03-09T06:46:23.488Z,data.gov
c50c9ca66d09a1e08310718557563388,69691c7bdcc3ce6d5d8a1361f22d04ac,,False,True,True,True,True,4,2025-03-09T06:46:23.488Z,data.gov


In [0]:
dbutils.notebook.exit("Success")