##### Produce construction standings

In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")

In [0]:
from pyspark.sql.functions import sum, when, count, col

In [0]:
constructor_standings_df = race_results_df \
.groupBy("race_year", "team") \
.agg(sum("points").alias("total_points"),
     count(when(col("position") == 1, True)).alias("wins"))

In [0]:
display(constructor_standings_df.filter("race_year = 2020"))

race_year,team,total_points,wins
2020,Haas F1 Team,3.0,0
2020,McLaren,202.0,0
2020,Ferrari,131.0,0
2020,Mercedes,573.0,13
2020,AlphaTauri,107.0,1
2020,Williams,0.0,0
2020,Red Bull,319.0,2
2020,Alfa Romeo,8.0,0
2020,Racing Point,210.0,1
2020,Renault,181.0,0


In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import desc, rank, asc

In [0]:
constructor_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))
final_df = constructor_standings_df.withColumn("rank", rank().over(constructor_rank_spec))

In [0]:
display(final_df.filter("race_year = 2020"))

race_year,team,total_points,wins,rank
2020,Mercedes,573.0,13,1
2020,Red Bull,319.0,2,2
2020,Racing Point,210.0,1,3
2020,McLaren,202.0,0,4
2020,Renault,181.0,0,5
2020,Ferrari,131.0,0,6
2020,AlphaTauri,107.0,1,7
2020,Alfa Romeo,8.0,0,8
2020,Haas F1 Team,3.0,0,9
2020,Williams,0.0,0,10


In [0]:
final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/constructor_standings")