In [0]:
%run "../utils/custom_functions"

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, BooleanType
from pyspark.sql.functions import lit

In [0]:
wifi_location_schema = StructType(fields=[
    StructField("STATION", StringType(), False),
    StructField("STATION_COMPLEX", StringType(), False),
    StructField("LINES", StringType(), False),
    StructField("IS_HISTORICAL", StringType(), False),
    StructField("BOROUGH", StringType(), False),
    StructField("COUNTY", StringType(), False),
    StructField("LATITUDE", DoubleType(), False),
    StructField("LONGITUDE", DoubleType(), False),
    StructField("WIFI_AVAILABLE", StringType(), False),
    StructField("ATT", StringType(), False),
    StructField("SPRINT", StringType(), False),
    StructField("TMOBILE", StringType(), False),
    StructField("VERIZON", StringType(), False),
    StructField("Location", StringType(), False),
    StructField("Georeference", StringType(), False)
    ])

In [0]:
wifi_location_df = spark.read \
    .option("header", True) \
    .option("schema", wifi_location_schema) \
    .csv("/mnt/mtasubwaydl/raw/mta_wi-fi_locations.csv")

In [0]:
wifi_location_renamed_df = wifi_location_df \
    .withColumnRenamed("STATION", "wfi_station") \
    .withColumnRenamed("STATION_COMPLEX", "wfi_station_complex") \
    .withColumnRenamed("LINES", "wfi_lines") \
    .withColumnRenamed("IS_HISTORICAL", "wfi_is_historical") \
    .withColumnRenamed("BOROUGH", "wfi_borough") \
    .withColumnRenamed("COUNTY", "wfi_county") \
    .withColumnRenamed("LATITUDE", "wfi_latitude") \
    .withColumnRenamed("LONGITUDE", "wfi_longitude") \
    .withColumnRenamed("WIFI_AVAILABLE", "wfi_wifi_available") \
    .withColumnRenamed("ATT", "wfi_att") \
    .withColumnRenamed("SPRINT", "wfi_sprint") \
    .withColumnRenamed("TMOBILE", "wfi_tmobile") \
    .withColumnRenamed("VERIZON", "wfi_verizon") \
    .withColumnRenamed("Location", "wfi_location") \
    .withColumnRenamed("Georeference", "wfi_georeference")

In [0]:
wifi_location_final_df = add_ingestion_date(wifi_location_renamed_df, alias="wfi") \
    .withColumn("wfi_source", lit("data.gov"))

In [0]:
wifi_location_final_df.write \
    .mode("overwrite") \
    .format("delta") \
    .partitionBy("wfi_borough") \
    .save("/mnt/mtasubwaydl/bronze/wifi_location")

if not spark.catalog.tableExists("mta_bronze.wifi_location"):
    spark.catalog.createTable(
        tableName="mta_bronze.wifi_location",
        path="/mnt/mtasubwaydl/bronze/wifi_location",
        source="delta"
)

In [0]:
%sql
SELECT * FROM mta_bronze.wifi_location LIMIT 10;

wfi_station,wfi_station_complex,wfi_lines,wfi_is_historical,wfi_borough,wfi_county,wfi_latitude,wfi_longitude,wfi_wifi_available,wfi_att,wfi_sprint,wfi_tmobile,wfi_verizon,wfi_location,wfi_georeference,wfi_ingestion_date,wfi_source
135th St,"135 St (B,C)",BC,No,Manhattan,New York County,40.817894,-73.947649,No,Yes,Yes,Yes,Yes,"(40.817894, -73.947649)",POINT (-73.947649 40.817894),2025-03-09T07:19:09.019Z,data.gov
7th Ave,"7 Av (B,D,E)",BDE,No,Manhattan,New York County,40.762862,-73.981637,No,Yes,Yes,Yes,Yes,"(40.762862, -73.981637)",POINT (-73.981637 40.762862),2025-03-09T07:19:09.019Z,data.gov
Prince St,"Prince St (N,R)",R,No,Manhattan,New York County,40.724329,-73.997702,No,No,Yes,No,No,"(40.724329, -73.997702)",POINT (-73.997702 40.724329),2025-03-09T07:19:09.019Z,data.gov
Essex St,"Delancey St (F)/Essex St (J,M,Z)",JMZ,No,Manhattan,New York County,40.718315,-73.987437,No,Yes,Yes,Yes,Yes,"(40.718315, -73.987437)",POINT (-73.987437 40.718315),2025-03-09T07:19:09.019Z,data.gov
103rd St,103 St (6),6,No,Manhattan,New York County,40.7906,-73.947478,No,Yes,Yes,Yes,Yes,"(40.7906, -73.947478)",POINT (-73.947478 40.7906),2025-03-09T07:19:09.019Z,data.gov
Broadway-Nassau,"Fulton St (J,M,Z,2,3,4,5)/Bway-Nassau St (A,C)",AC,No,Manhattan,New York County,40.710197,-74.007691,No,Yes,Yes,Yes,Yes,"(40.710197, -74.007691)",POINT (-74.007691 40.710197),2025-03-09T07:19:09.019Z,data.gov
34th St,"34 St-Penn Station (1,2,3)",123,No,Manhattan,New York County,40.750373,-73.991057,No,No,No,No,No,"(40.750373, -73.991057)",POINT (-73.991057 40.750373),2025-03-09T07:19:09.019Z,data.gov
103rd St,"103 St (B,C)",BC,No,Manhattan,New York County,40.796092,-73.961454,No,Yes,Yes,Yes,Yes,"(40.796092, -73.961454)",POINT (-73.961454 40.796092),2025-03-09T07:19:09.019Z,data.gov
5th Ave,"42 St-Bryant Pk (B,D,F,M)/5 Av (7)",7,No,Manhattan,New York County,40.753821,-73.981963,No,Yes,Yes,Yes,Yes,"(40.753821, -73.981963)",POINT (-73.981963 40.753821),2025-03-09T07:19:09.019Z,data.gov
145th St,145 St (3),23,Yes,Manhattan,New York County,40.820421,-73.936245,No,Yes,Yes,Yes,Yes,"(40.820421, -73.936245)",POINT (-73.936245 40.820421),2025-03-09T07:19:09.019Z,data.gov


In [0]:
dbutils.notebook.exit("Success")