In [0]:
from pyspark.sql import functions as F

trips = spark.table("project.taxi_silver.trips")
zones = spark.table("project.taxi_silver.dim_zone")
weather = spark.table("project.taxi_silver.weather_daily")

zone_daily = (
    trips
    .join(
        zones,
        trips.pickup_location_id == zones.location_id,
        "left"
    )

    .groupBy(
        "pickup_date",
        "year",
        "month",
        "borough",
        "zone",
        "service_zone"
    )
    .agg(
        F.count("*").alias("trips"),
        F.sum("total_amount").alias("revenue"),
        F.avg("fare_amount").alias("avg_fare"),
        F.avg("tip_amount").alias("avg_tip"),
        F.avg("trip_distance").alias("avg_distance"),
        F.avg("trip_minutes").alias("avg_duration")
    )

    .join(
        weather.select(
            "date",
            "min_temp_c",
            "max_temp_c",
            "rain_mm",
            "is_rain",
            "is_snow"
        ),
        weather.date == F.col("pickup_date"),
        "left"
    )

    .select(
        F.col("pickup_date").alias("date"),
        "borough",
        "zone",
        "service_zone",
        "trips",
        "revenue",
        "avg_fare",
        "avg_tip",
        "avg_distance",
        "avg_duration",
        "min_temp_c",
        "max_temp_c",
        "rain_mm",
        "is_rain",
        "is_snow",
        "year",
        "month"
    )
)

spark.sql("DROP TABLE IF EXISTS project.taxi_gold.zone_daily_kpis")

(zone_daily.write
  .format("delta")
  .mode("overwrite")
  .partitionBy("year", "month")
  .saveAsTable("project.taxi_gold.zone_daily_kpis")
)
