In [0]:
%run "../utils/custom_functions"

In [0]:
from pyspark.sql.types import StructType, StructField, StringType
from pyspark.sql.functions import col, lit

In [0]:
lkp_station_schema = StructType(fields=[
    StructField("abbreviation", StringType(), False),
    StructField("name", StringType(), False)
])

In [0]:
lkp_station_df = spark.read \
    .option("header", "true") \
    .csv("/mnt/mtasubwaydl/raw/station_abbreviation.csv") \
    .select(
        col("abbreviation").alias("lks_abbr"),
        col("name").alias("lks_name")
    )

In [0]:
lkp_station_final_df = add_ingestion_date(lkp_station_df, alias="lks") \
    .withColumn("lks_source", lit("data.gov"))

In [0]:
lkp_station_final_df.write \
    .mode("overwrite") \
    .format("delta") \
    .save("/mnt/mtasubwaydl/bronze/lkp_station")

if not spark.catalog.tableExists("mta_bronze.lkp_station"):
    spark.catalog.createTable(
        tableName="mta_bronze.lkp_station",
        path="/mnt/mtasubwaydl/bronze/lkp_station",
        source="delta"
)

In [0]:
%sql
SELECT * FROM mta_bronze.lkp_station LIMIT 10;

lks_abbr,lks_name,lks_ingestion_date,lks_source
1 Av,1st Avenue,2025-03-09T07:17:58.534Z,data.gov
103 St,103rd Street,2025-03-09T07:17:58.534Z,data.gov
103 St-Corona Plaza,103rd Street-Corona Plaza,2025-03-09T07:17:58.534Z,data.gov
103rd St,103rd Street,2025-03-09T07:17:58.534Z,data.gov
104 St,104th Street,2025-03-09T07:17:58.534Z,data.gov
110 St,110th Street,2025-03-09T07:17:58.534Z,data.gov
110th St,110th Street,2025-03-09T07:17:58.534Z,data.gov
110th St/Central Park N,110th Street/Central Park North,2025-03-09T07:17:58.534Z,data.gov
110thSt,110th Street,2025-03-09T07:17:58.534Z,data.gov
111 St,111th Street,2025-03-09T07:17:58.534Z,data.gov


In [0]:
dbutils.notebook.exit("Success")