##### Produce driver standings

In [None]:
dbutils.widgets.text("p_file_date", "2021-03-28")
v_file_date = dbutils.widgets.get("p_file_date")

In [None]:
from src.formula1.formula1_constants import *
from src.formula1.formula1_utils import *

Find race years for which the data is to be reprocessed

In [None]:
race_results_df = spark.read.table("f1_gold.race_results").filter(
    f"file_date = '{v_file_date}'"
)

In [None]:
race_year_list = df_column_to_list(race_results_df, "race_year")

In [None]:
from pyspark.sql.functions import col

race_results_df = spark.read.table("f1_gold.race_results").filter(
    col("race_year").isin(race_year_list)
)

In [None]:
from pyspark.sql.functions import sum, when, count, col

driver_standings_df = race_results_df.groupBy(
    "race_year", "driver_name", "driver_nationality"
).agg(
    sum("points").alias("total_points"),
    count(when(col("position") == 1, True)).alias("wins"),
)

In [None]:
from pyspark.sql.window import Window
from pyspark.sql.functions import desc, rank, asc

driver_rank_spec = Window.partitionBy("race_year").orderBy(
    desc("total_points"), desc("wins")
)
final_df = driver_standings_df.withColumn("rank", rank().over(driver_rank_spec))

In [None]:
# merge_condition = "tgt.driver_name = src.driver_name AND tgt.race_year = src.race_year"
# merge_delta_data(final_df, 'f1_gold', 'driver_standings', presentation_folder_path, merge_condition, 'race_year')

final_df.write.mode("overwrite").partitionBy("race_year").format("delta").saveAsTable(
    "f1_gold.driver_standings"
)

In [None]:
%sql
SELECT * FROM f1_gold.driver_standings WHERE race_year = 2021;