### Gold Layer: Driver Standings

Calculate driver standings based on race results.

In [None]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [None]:
from formula1.formula1_constants import presentation_folder_path

##### Step 1 - Read race results from gold layer

In [None]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")

##### Step 2 - Aggregate data to calculate points and wins

In [None]:
from pyspark.sql.functions import sum, when, count, col

driver_standings_df = race_results_df \
.groupBy("race_year", "driver_name", "driver_nationality", "team") \
.agg(sum("points").alias("total_points"),
     count(when(col("position") == 1, True)).alias("wins"))

In [None]:
display(driver_standings_df.filter("race_year = 2020"))

##### Step 3 - Rank drivers by points and wins

In [None]:
from pyspark.sql.window import Window
from pyspark.sql.functions import desc, rank, asc

driver_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))
final_df = driver_standings_df.withColumn("rank", rank().over(driver_rank_spec))

In [None]:
#display(final_df.filter("race_year = 2020"))

##### Step 4 - Write to gold/presentation layer

In [None]:
final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/driver_standings")