# Produce Constructor Standings

In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results.parquet")

In [0]:
race_results_df.show(5)

+---------+------------------+----------+----------------+---------------+-------------+------------------+--------+----+-----------+-----------+------+--------+
|race_year|         race_name| race_date|circuit_location|    driver_name|driver_number|driver_nationality|    team|grid|fastest_lap|  race_time|points|position|
+---------+------------------+----------+----------------+---------------+-------------+------------------+--------+----+-----------+-----------+------+--------+
|     2021|Bahrain Grand Prix|2021-03-28|          Sakhir| Lewis Hamilton|           44|           British|Mercedes|   2|         44|1:32:03.897|  25.0|       1|
|     2021|Bahrain Grand Prix|2021-03-28|          Sakhir| Max Verstappen|           33|             Dutch|Red Bull|   1|         41|     +0.745|  18.0|       2|
|     2021|Bahrain Grand Prix|2021-03-28|          Sakhir|Valtteri Bottas|           77|           Finnish|Mercedes|   3|         56|    +37.383|  16.0|       3|
|     2021|Bahrain Grand Pri

In [0]:
from pyspark.sql.window import Window
from pyspark.sql import functions as f

In [0]:
constructor_standings_df = race_results_df.groupBy("race_year", "team") \
                        .agg(f.sum("points").alias("total_points"), f.count(f.when(f.col("position") == 1, True)).alias("wins"))

In [0]:
display(constructor_standings_df.filter("race_year = 2020"))

race_year,team,total_points,wins


In [0]:
constructor_rank_spec = Window.partitionBy("race_year").orderBy(f.desc("total_points"), f.desc("wins"))

In [0]:
final_df = constructor_standings_df.withColumn("rank", f.rank().over(constructor_rank_spec))

In [0]:
display(final_df.filter("race_year = 2020"))

race_year,team,total_points,wins,rank


In [0]:
# final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/constructor_standings.parquet")
# final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_presentation.constructor_standings")
merge_condition = "tgt.driver_name = src.driver_name AND tgt.race_id = src.race_id"
merge_delta_data(final_df, 'f1_presentation', 'constructor_standings', presentation_folder_path, merge_condition, 'race_id')