In [0]:
%run "../utils/custom_functions"

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, BooleanType
from pyspark.sql.functions import lit

In [0]:
wifi_location_schema = StructType(fields=[
    StructField("STATION", StringType(), False),
    StructField("STATION_COMPLEX", StringType(), False),
    StructField("LINES", StringType(), False),
    StructField("IS_HISTORICAL", StringType(), False),
    StructField("BOROUGH", StringType(), False),
    StructField("COUNTY", StringType(), False),
    StructField("LATITUDE", DoubleType(), False),
    StructField("LONGITUDE", DoubleType(), False),
    StructField("WIFI_AVAILABLE", StringType(), False),
    StructField("ATT", StringType(), False),
    StructField("SPRINT", StringType(), False),
    StructField("TMOBILE", StringType(), False),
    StructField("VERIZON", StringType(), False),
    StructField("Location", StringType(), False),
    StructField("Georeference", StringType(), False)
    ])

In [0]:
wifi_location_df = spark.read \
    .option("header", True) \
    .option("schema", wifi_location_schema) \
    .csv("/mnt/mtasubwaydl/raw/mta_wi-fi_locations.csv")

In [0]:
wifi_location_renamed_df = wifi_location_df \
    .withColumnRenamed("STATION", "wfi_station") \
    .withColumnRenamed("STATION_COMPLEX", "wfi_station_complex") \
    .withColumnRenamed("LINES", "wfi_lines") \
    .withColumnRenamed("IS_HISTORICAL", "wfi_is_historical") \
    .withColumnRenamed("BOROUGH", "wfi_borough") \
    .withColumnRenamed("COUNTY", "wfi_county") \
    .withColumnRenamed("LATITUDE", "wfi_latitude") \
    .withColumnRenamed("LONGITUDE", "wfi_longitude") \
    .withColumnRenamed("WIFI_AVAILABLE", "wfi_wifi_available") \
    .withColumnRenamed("ATT", "wfi_att") \
    .withColumnRenamed("SPRINT", "wfi_sprint") \
    .withColumnRenamed("TMOBILE", "wfi_tmobile") \
    .withColumnRenamed("VERIZON", "wfi_verizon") \
    .withColumnRenamed("Location", "wfi_location") \
    .withColumnRenamed("Georeference", "wfi_georeference")

In [0]:
wifi_location_final_df = add_ingestion_date(wifi_location_renamed_df, alias="wfi") \
    .withColumn("wfi_source", lit("data.gov"))

In [0]:
wifi_location_final_df.write \
    .mode("overwrite") \
    .format("delta") \
    .partitionBy("wfi_borough") \
    .save("/mnt/mtasubwaydl/bronze/wifi_location")

if not spark.catalog.tableExists("mta_bronze.wifi_location"):
    spark.catalog.createTable(
        tableName="mta_bronze.wifi_location",
        path="/mnt/mtasubwaydl/bronze/wifi_location",
        source="delta"
)

In [0]:
%sql
SELECT * FROM mta_bronze.wifi_location LIMIT 20;

wfi_station,wfi_station_complex,wfi_lines,wfi_is_historical,wfi_borough,wfi_county,wfi_latitude,wfi_longitude,wfi_wifi_available,wfi_att,wfi_sprint,wfi_tmobile,wfi_verizon,wfi_location,wfi_georeference,wfi_ingestion_date,wfi_source
149th S. Grand Concourse (LL),"149 St-Grand Concourse (2,4,5)",25,Yes,Bronx,Bronx County,40.81841,-73.926718,No,Yes,Yes,No,Yes,"(40.81841, -73.926718)",POINT (-73.926718 40.81841),2025-03-09T06:46:23.488Z,data.gov
Tremont Ave,"Tremont Av (B,D)",BD,No,Bronx,Bronx County,40.85041,-73.905227,No,Yes,Yes,Yes,Yes,"(40.85041, -73.905227)",POINT (-73.905227 40.85041),2025-03-09T06:46:23.488Z,data.gov
205th St,Norwood-205 St (D),D,No,Bronx,Bronx County,40.874811,-73.878855,No,Yes,Yes,Yes,Yes,"(40.874811, -73.878855)",POINT (-73.878855 40.874811),2025-03-09T06:46:23.488Z,data.gov
170th St,"170 St (B,D)",BD,No,Bronx,Bronx County,40.839306,-73.9134,No,Yes,Yes,Yes,Yes,"(40.839306, -73.9134)",POINT (-73.9134 40.839306),2025-03-09T06:46:23.488Z,data.gov
Bedford Park Blvd,"Bedford Park Blvd (B,D)",BD,No,Bronx,Bronx County,40.873244,-73.887138,No,Yes,Yes,Yes,Yes,"(40.873244, -73.887138)",POINT (-73.887138 40.873244),2025-03-09T06:46:23.488Z,data.gov
Cypress Avenue,Cypress Av (6),6,No,Bronx,Bronx County,40.805368,-73.914042,No,Yes,Yes,Yes,Yes,"(40.805368, -73.914042)",POINT (-73.914042 40.805368),2025-03-09T06:46:23.488Z,data.gov
149th St,"149 St-Grand Concourse (2,4,5)",4,No,Bronx,Bronx County,40.818375,-73.927351,No,Yes,Yes,Yes,Yes,"(40.818375, -73.927351)",POINT (-73.927351 40.818375),2025-03-09T06:46:23.488Z,data.gov
161st St,"161 St-Yankee Stadium (B,D,4)",BD,No,Bronx,Bronx County,40.827905,-73.925651,No,Yes,Yes,Yes,Yes,"(40.827905, -73.925651)",POINT (-73.925651 40.827905),2025-03-09T06:46:23.488Z,data.gov
167th St,"167 St (B,D)",BD,No,Bronx,Bronx County,40.833773,-73.918438,No,Yes,Yes,Yes,Yes,"(40.833773, -73.918438)",POINT (-73.918438 40.833773),2025-03-09T06:46:23.488Z,data.gov
Brook Avenue,Brook Av (6),6,No,Bronx,Bronx County,40.807566,-73.91924,No,Yes,Yes,Yes,Yes,"(40.807566, -73.91924)",POINT (-73.91924 40.807566),2025-03-09T06:46:23.488Z,data.gov


In [0]:
dbutils.notebook.exit("Success")