In [None]:
import dlt
from pyspark.sql.types import StructType, StructField, StringType, DecimalType, TimestampType

#ENVIRONMENT SPECIFIC
environment = spark.conf.get("environment")
region = spark.conf.get("region")
catalog_name = f"citibike_{environment}"
#TABLE_NAMES
src_data_path = f'/Volumes/{catalog_name}/00_landing/source_citibike_data/JC-202503-citibike-tripdata.csv'

schema = StructType([
    StructField("ride_id", StringType(), True), 
    StructField("rideable_type", StringType(), True),
    StructField("started_at", TimestampType(), True),   
    StructField("ended_at", TimestampType(), True),
    StructField("start_station_name", StringType(), True),
    StructField("start_station_id", StringType(), True),
    StructField("end_station_name", StringType(), True),
    StructField("end_station_id", StringType(), True),
    StructField("start_lat", DecimalType(10, 7), True),
    StructField("start_lng", DecimalType(10, 7), True),
    StructField("end_lat", DecimalType(10, 7), True),
    StructField("end_lng", DecimalType(10, 7), True),
    StructField("member_casual", StringType(), True)
])

@dlt.table(
    name = "bronze_jc_citibike",
    comment="Bronze layer: raw Citi Bike data with ingest metadata"
)
def bronze_jc_citibike():
    df = (
        spark.read
        .schema(schema)
        .csv(src_data_path, header=True)
    )

    return df