In [0]:
from pyspark.sql import functions as F

trips = spark.table("project.taxi_silver.trips")
weather = spark.table("project.taxi_silver.weather_daily")

city_daily = (
    trips
    .groupBy("pickup_date", "year", "month")
    .agg(
        F.count("*").alias("trips"),
        F.sum("total_amount").alias("revenue"),
        F.avg("fare_amount").alias("avg_fare"),
        F.avg("tip_amount").alias("avg_tip"),
        F.avg("trip_distance").alias("avg_distance"),
        F.avg("trip_minutes").alias("avg_duration")
    )
    .join(
        weather.select(
            "date",
            "min_temp_c",
            "max_temp_c",
            "rain_mm",
            "is_rain",
            "is_snow"
        ),
        weather.date == F.col("pickup_date"),
        "left"
    )
    .select(
        F.col("pickup_date").alias("date"),
        "min_temp_c","max_temp_c", "trips","revenue","avg_fare",
        "avg_tip","avg_distance","avg_duration","rain_mm","is_rain", "is_snow", "year", "month"
    )
)

spark.sql("CREATE SCHEMA IF NOT EXISTS project.taxi_gold")
spark.sql("DROP TABLE IF EXISTS project.taxi_gold.city_daily_kpis")

(city_daily.write
  .format("delta")
  .mode("overwrite")
  .partitionBy("year", "month")
  .saveAsTable("project.taxi_gold.city_daily_kpis")
)
