# Formula 1 DLT Gold Layer
This notebook creates gold layer aggregated views for Formula 1 analytics.

In [None]:
import dlt
from pyspark.sql.functions import col, sum, count, when, desc, rank, current_timestamp
from pyspark.sql.window import Window

## Race Results
Join silver tables to create comprehensive race results view.

In [None]:
@dlt.table(
    comment="Gold layer: comprehensive race results joining drivers, constructors, races, and circuits"
)
def gold_race_results():
    # Read silver tables
    results_df = dlt.read("silver_results").withColumnRenamed("race_id", "result_race_id")
    
    drivers_df = (
        dlt.read("silver_drivers")
        .withColumnRenamed("number", "driver_number")
        .withColumnRenamed("name", "driver_name")
        .withColumnRenamed("nationality", "driver_nationality")
    )
    
    constructors_df = dlt.read("silver_constructors").withColumnRenamed("name", "team")
    
    circuits_df = dlt.read("silver_circuits").withColumnRenamed("location", "circuit_location")
    
    races_df = (
        dlt.read("silver_races")
        .withColumnRenamed("name", "race_name")
        .withColumnRenamed("race_timestamp", "race_date")
    )
    
    # Join races to circuits
    race_circuits_df = races_df.join(
        circuits_df, races_df.circuit_id == circuits_df.circuit_id, "inner"
    ).select(
        races_df.race_id,
        races_df.race_year,
        races_df.race_name,
        races_df.race_date,
        circuits_df.circuit_location,
    )
    
    # Join results to all other dataframes
    race_results_df = (
        results_df
        .join(race_circuits_df, results_df.result_race_id == race_circuits_df.race_id)
        .join(drivers_df, results_df.driver_id == drivers_df.driver_id)
        .join(constructors_df, results_df.constructor_id == constructors_df.constructor_id)
    )
    
    return race_results_df.select(
        "race_id",
        "race_year",
        "race_name",
        "race_date",
        "circuit_location",
        "driver_name",
        "driver_number",
        "driver_nationality",
        "team",
        "grid",
        "fastest_lap",
        col("time").alias("race_time"),
        "points",
        "position",
    ).withColumn("created_date", current_timestamp())

## Driver Standings
Aggregate race results by driver with rankings.

In [None]:
@dlt.table(
    comment="Gold layer: driver standings aggregated by race year"
)
def gold_driver_standings():
    race_results_df = dlt.read("gold_race_results")
    
    driver_standings_df = race_results_df.groupBy(
        "race_year", "driver_name", "driver_nationality"
    ).agg(
        sum("points").alias("total_points"),
        count(when(col("position") == 1, True)).alias("wins"),
    )
    
    driver_rank_spec = Window.partitionBy("race_year").orderBy(
        desc("total_points"), desc("wins")
    )
    
    return driver_standings_df.withColumn("rank", rank().over(driver_rank_spec))

## Constructor Standings
Aggregate race results by constructor with rankings.

In [None]:
@dlt.table(
    comment="Gold layer: constructor standings aggregated by race year"
)
def gold_constructor_standings():
    race_results_df = dlt.read("gold_race_results")
    
    constructor_standings_df = race_results_df.groupBy(
        "race_year", "team"
    ).agg(
        sum("points").alias("total_points"),
        count(when(col("position") == 1, True)).alias("wins"),
    )
    
    constructor_rank_spec = Window.partitionBy("race_year").orderBy(
        desc("total_points"), desc("wins")
    )
    
    return constructor_standings_df.withColumn("rank", rank().over(constructor_rank_spec))