# Gold Layer - Aggregate for Analytics


In [None]:
from pyspark.sql.functions import to_date, sum

# Read Silver layer (assuming you saved it as a managed Delta table)
df_silver = spark.read.table("nyc_taxi_silver") 


# Create Gold layer: aggregate by pickup date
df_gold = df_silver \
    .withColumn("pickup_date", to_date("pickup_datetime")) \
    .groupBy("pickup_date") \
    .agg(
        sum("total_amount").alias("total_daily_revenue"),
        sum("trip_distance").alias("total_daily_distance"),
        sum("passenger_count").alias("total_daily_passengers"),
        sum("fare_amount").alias("total_daily_fare")
    )

# Order by date
df_gold = df_gold.orderBy("pickup_date")

# Write to Gold layer as Delta table
gold_table_name = "gold_green_taxi_daily_metrics"

df_gold.write.format("delta").mode("overwrite").saveAsTable(gold_table_name)

