In [0]:
%run "../utils/custom_functions"

In [0]:
from pyspark.sql.functions import md5, concat_ws, col, when

In [0]:
wifi_location_df = spark.read \
    .table("mta_bronze.wifi_location")

dim_boroughs_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

stations_df = spark.read \
    .table("mta_silver.dim_station")

In [0]:
wifi_location_transform_df = wifi_location_df \
    .where("wfi_station is not null") \
    .join(dim_boroughs_df, wifi_location_df.wfi_borough == dim_boroughs_df.brh_name, "inner") \
    .join(lkp_stations_df, wifi_location_df.wfi_station == lkp_stations_df.lks_abbr, "inner") \
    .join(stations_df, \
        (wifi_location_df.wfi_longitude == stations_df.stn_gtfs_longitude) & \
        (wifi_location_df.wfi_latitude == stations_df.stn_gtfs_latitude), "left") \
    .withColumn("wfi_sk", md5(concat_ws("_", col("wfi_station"), col("wfi_station_complex"), col("wfi_lines")))) \
    .withColumn("wfi_is_historical", when(col("wfi_is_historical") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_wifi_available", when(col("wfi_wifi_available") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_att", when(col("wfi_att") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_sprint", when(col("wfi_sprint") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_tmobile", when(col("wfi_tmobile") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_verizon", when(col("wfi_verizon") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_connectivity_score", \
        (col("wfi_wifi_available").cast("int") + col('wfi_att').cast("int") + col('wfi_sprint').cast("int") + col('wfi_tmobile').cast("int") + col('wfi_verizon').cast("int"))) \
    .dropDuplicates(["wfi_sk"])


In [0]:
wifi_location_final_df = wifi_location_transform_df.select(
    "wfi_sk",
    "brh_sk",
    "stn_sk",
    "wfi_wifi_available",
    "wfi_att",
    "wfi_sprint",
    "wfi_tmobile",
    "wfi_verizon",
    "wfi_connectivity_score",
    "wfi_ingestion_date",
    "wfi_source"
)

In [0]:
if not spark._jsparkSession.catalog().tableExists("mta_silver.fct_wifi_location"):

    wifi_location_final_df.write \
        .mode("overwrite") \
        .format("delta") \
        .partitionBy("wfi_ingestion_date") \
        .option("mergeSchema", "true") \
        .saveAsTable("mta_silver.fct_wifi_location")

else:
    merge_delta_data(
        wifi_location_final_df,
        db_name="mta_silver",
        table_name="fct_wifi_location",
        merge_condition="tgt.wfi_sk = src.wfi_sk",
        partition_column="wfi_ingestion_date"
    )

In [0]:
%sql
SELECT * FROM mta_silver.fct_wifi_location LIMIT 10;

wfi_sk,brh_sk,stn_sk,wfi_wifi_available,wfi_att,wfi_sprint,wfi_tmobile,wfi_verizon,wfi_connectivity_score,wfi_ingestion_date,wfi_source
00a2b29e1211cdedf218905bb04e1374,69691c7bdcc3ce6d5d8a1361f22d04ac,57b34d206a27be9b82be83eba7f59e74,False,True,True,False,True,3,2025-03-09T07:19:09.019Z,data.gov
02d809f20f61da3c34c7da30a2dd48ed,69691c7bdcc3ce6d5d8a1361f22d04ac,9f9646a27e60ac1c2060dfa761905216,False,True,True,True,True,4,2025-03-09T07:19:09.019Z,data.gov
030bd69c59dc33a2bbdd3cb981364bf8,69691c7bdcc3ce6d5d8a1361f22d04ac,355a45687e4d90030e80e3785c3d0381,False,True,True,True,True,4,2025-03-09T07:19:09.019Z,data.gov
033b83800e3d395ffff037bdc8963b91,c64a5dae48f04e16deab001d4b9dbd80,317a5d87ce142d46def1294db194daf7,False,True,True,False,True,3,2025-03-09T07:19:09.019Z,data.gov
033d08d64cd3b13c2f364099244dc81f,c64a5dae48f04e16deab001d4b9dbd80,22cf8f810d2d2ab5c81a14b1da753bec,False,True,True,False,True,3,2025-03-09T07:19:09.019Z,data.gov
03fcb27ad916dd7b79180fcdc7e4dabe,69691c7bdcc3ce6d5d8a1361f22d04ac,f744f598228967b3fddd45959fff2119,False,False,True,False,False,1,2025-03-09T07:19:09.019Z,data.gov
050cc9136c40efe0053d4b72c8da0362,c64a5dae48f04e16deab001d4b9dbd80,a9e263f1430f8a2ab5376a6cc7dea694,False,False,False,False,False,0,2025-03-09T07:19:09.019Z,data.gov
0574de353d8ac4321b6bed4b7bcdec8b,c64a5dae48f04e16deab001d4b9dbd80,f523bf78dfd57229f45c64d3a8624897,False,False,False,False,False,0,2025-03-09T07:19:09.019Z,data.gov
05d152a8ca51a3944d8f7a90a06c6cc1,69691c7bdcc3ce6d5d8a1361f22d04ac,0e36fb16b83fcfd99df4659120ccdfe2,False,True,True,True,True,4,2025-03-09T07:19:09.019Z,data.gov
0637b73c1f625330f1416508db51a3b2,69691c7bdcc3ce6d5d8a1361f22d04ac,f6033621ef759c209be196d036ef6b96,False,True,True,True,True,4,2025-03-09T07:19:09.019Z,data.gov


In [0]:
dbutils.notebook.exit("Success")