# Produce Constructor Standings

In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results.parquet")

In [0]:
race_results_df.show(5)

+---------+--------------------+---------+----------------+----------------+-------------+------------------+----------+----+-----------+-----------+------+--------+
|race_year|           race_name|race_date|circuit_location|     driver_name|driver_number|driver_nationality|      team|grid|fastest_lap|  race_time|points|position|
+---------+--------------------+---------+----------------+----------------+-------------+------------------+----------+----+-----------+-----------+------+--------+
|     1990|Hungarian Grand Prix|     null|        Budapest| Thierry Boutsen|         null|           Belgian|  Williams|   1|       null|1:49:30.597|   9.0|       1|
|     1990|Hungarian Grand Prix|     null|        Budapest|    Ayrton Senna|         null|         Brazilian|   McLaren|   4|       null|     +0.288|   6.0|       2|
|     1990|Hungarian Grand Prix|     null|        Budapest|   Nelson Piquet|         null|         Brazilian|  Benetton|   9|       null|    +27.893|   4.0|       3|
|   

In [0]:
from pyspark.sql.window import Window
from pyspark.sql import functions as f

In [0]:
constructor_standings_df = race_results_df.groupBy("race_year", "team") \
                        .agg(f.sum("points").alias("total_points"), f.count(f.when(f.col("position") == 1, True)).alias("wins"))

In [0]:
display(constructor_standings_df.filter("race_year = 2020"))

race_year,team,total_points,wins
2020,Haas F1 Team,3.0,0
2020,McLaren,202.0,0
2020,Ferrari,131.0,0
2020,Mercedes,573.0,13
2020,AlphaTauri,107.0,1
2020,Williams,0.0,0
2020,Red Bull,319.0,2
2020,Alfa Romeo,8.0,0
2020,Racing Point,210.0,1
2020,Renault,181.0,0


In [0]:
constructor_rank_spec = Window.partitionBy("race_year").orderBy(f.desc("total_points"), f.desc("wins"))

In [0]:
final_df = constructor_standings_df.withColumn("rank", f.rank().over(constructor_rank_spec))

In [0]:
display(final_df.filter("race_year = 2020"))

race_year,team,total_points,wins,rank
2020,Mercedes,573.0,13,1
2020,Red Bull,319.0,2,2
2020,Racing Point,210.0,1,3
2020,McLaren,202.0,0,4
2020,Renault,181.0,0,5
2020,Ferrari,131.0,0,6
2020,AlphaTauri,107.0,1,7
2020,Alfa Romeo,8.0,0,8
2020,Haas F1 Team,3.0,0,9
2020,Williams,0.0,0,10


In [0]:
final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/constructor_standings.parquet")