In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")

In [0]:
race_results_df.show(10)

+---------+--------------------+-------------------+----------------+---------------+-------------+------------------+------------+----+-----------+---------+------+--------+--------------------+
|race_year|           race_name|          race_date|circuit_location|    driver_name|driver_number|driver_nationality|        team|grid|fastest_lap|race_time|points|position|        created_date|
+---------+--------------------+-------------------+----------------+---------------+-------------+------------------+------------+----+-----------+---------+------+--------+--------------------+
|     2018|Australian Grand ...|2018-03-25 05:10:00|       Melbourne|Sergey Sirotkin|           35|           Russian|    Williams|  19|          3|       \N|   0.0|    null|2023-09-30 14:34:...|
|     2018|Australian Grand ...|2018-03-25 05:10:00|       Melbourne|Marcus Ericsson|            9|           Swedish|      Sauber|  17|          4|       \N|   0.0|    null|2023-09-30 14:34:...|
|     2018|Australia

In [0]:
from pyspark.sql.functions import sum, count, col, when

In [0]:
constructor_standings_df = race_results_df.groupBy("race_year", "team")\
    .agg(count(when(col("position") == 1, True)).alias("wins"),
         sum("points").alias("total_points"))

In [0]:
constructor_standings_df.show(10)

+---------+--------------+----+------------+
|race_year|          team|wins|total_points|
+---------+--------------+----+------------+
|     2019|    Alfa Romeo|   0|        57.0|
|     2012|        Sauber|   0|       126.0|
|     2014|    Toro Rosso|   0|        30.0|
|     2016|       McLaren|   0|        76.0|
|     2015|Manor Marussia|   0|         0.0|
|     2012|      Caterham|   0|         0.0|
|     2017|   Force India|   0|       187.0|
|     2012|      Marussia|   0|         0.0|
|     2015|      Williams|   0|       257.0|
|     2017|      Mercedes|  12|       668.0|
+---------+--------------+----+------------+
only showing top 10 rows



In [0]:
display(constructor_standings_df.filter("race_year = 2020"))

race_year,team,wins,total_points
2020,Haas F1 Team,0,3.0
2020,McLaren,0,202.0
2020,Ferrari,0,131.0
2020,Mercedes,13,573.0
2020,AlphaTauri,1,107.0
2020,Williams,0,0.0
2020,Red Bull,2,319.0
2020,Alfa Romeo,0,8.0
2020,Racing Point,1,210.0
2020,Renault,0,181.0


In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import rank, desc, asc

In [0]:
constructor_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))
final_df = constructor_standings_df.withColumn("rank",rank().over(constructor_rank_spec))

In [0]:
final_df.show(10)

+---------+------------+----+------------+----+
|race_year|        team|wins|total_points|rank|
+---------+------------+----+------------+----+
|     1950|  Alfa Romeo|   6|        89.0|   1|
|     1950|     Ferrari|   0|        21.0|   2|
|     1950| Talbot-Lago|   0|        20.0|   3|
|     1950|Kurtis Kraft|   1|        14.0|   4|
|     1950|    Maserati|   0|        11.0|   5|
|     1950|       Deidt|   0|        10.0|   6|
|     1950|       Simca|   0|         3.0|   7|
|     1950|      Milano|   0|         0.0|   8|
|     1950|         Rae|   0|         0.0|   8|
|     1950|     Langley|   0|         0.0|   8|
+---------+------------+----+------------+----+
only showing top 10 rows



In [0]:
display(final_df.filter("race_year = 2020"))

race_year,team,wins,total_points,rank
2020,Mercedes,13,573.0,1
2020,Red Bull,2,319.0,2
2020,Racing Point,1,210.0,3
2020,McLaren,0,202.0,4
2020,Renault,0,181.0,5
2020,Ferrari,0,131.0,6
2020,AlphaTauri,1,107.0,7
2020,Alfa Romeo,0,8.0,8
2020,Haas F1 Team,0,3.0,9
2020,Williams,0,0.0,10


In [0]:
final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/constructor_standings")