In [0]:
from pyspark.sql.functions import md5, concat_ws, col, when

In [0]:
wifi_location_df = spark.read \
    .table("mta_bronze.wifi_location")

dim_boroughs_df = spark.read \
    .table("mta_silver.dim_borough")

lkp_stations_df = spark.read \
    .table("mta_silver.lkp_station")

stations_df = spark.read \
    .table("mta_silver.dim_station")

In [0]:
wifi_location_transform_df = wifi_location_df \
    .where("wfi_station is not null") \
    .join(dim_boroughs_df, wifi_location_df.wfi_borough == dim_boroughs_df.brh_name, "inner") \
    .join(lkp_stations_df, wifi_location_df.wfi_station == lkp_stations_df.lks_abbr, "inner") \
    .join(stations_df, \
        (wifi_location_df.wfi_longitude == stations_df.stn_gtfs_longitude) & \
        (wifi_location_df.wfi_latitude == stations_df.stn_gtfs_latitude), "left") \
    .withColumn("wfi_sk", md5(concat_ws("_", col("wfi_station"), col("wfi_station_complex"), col("wfi_lines")))) \
    .withColumn("wfi_is_historical", when(col("wfi_is_historical") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_wifi_available", when(col("wfi_wifi_available") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_att", when(col("wfi_att") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_sprint", when(col("wfi_sprint") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_tmobile", when(col("wfi_tmobile") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_verizon", when(col("wfi_verizon") == "Yes", True).otherwise(False)) \
    .withColumn("wfi_connectivity_score", \
        (col("wfi_wifi_available").cast("int") + col('wfi_att').cast("int") + col('wfi_sprint').cast("int") + col('wfi_tmobile').cast("int") + col('wfi_verizon').cast("int"))) \
    .dropDuplicates(["wfi_sk"])


In [0]:
wifi_location_final_df = wifi_location_transform_df.select(
    "wfi_sk",
    "brh_sk",
    "stn_sk",
    "wfi_wifi_available",
    "wfi_att",
    "wfi_sprint",
    "wfi_tmobile",
    "wfi_verizon",
    "wfi_connectivity_score",
    "wfi_ingestion_date",
    "wfi_source"
)

In [0]:
wifi_location_final_df.write.mode("overwrite").format("delta").option("mergeSchema", "true").saveAsTable("mta_silver.fct_wifi_location")

In [0]:
%sql
SELECT * FROM mta_silver.fct_wifi_location;

In [0]:
dbutils.notebook.exit("Success")