In [0]:
%run "../includes/configuration"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")

In [0]:
race_results_df.show(10)

+---------+--------------------+-------------------+----------------+---------------+-------------+------------------+------------+----+-----------+---------+------+--------+--------------------+
|race_year|           race_name|          race_date|circuit_location|    driver_name|driver_number|driver_nationality|        team|grid|fastest_lap|race_time|points|position|        created_date|
+---------+--------------------+-------------------+----------------+---------------+-------------+------------------+------------+----+-----------+---------+------+--------+--------------------+
|     2018|Australian Grand ...|2018-03-25 05:10:00|       Melbourne|Sergey Sirotkin|           35|           Russian|    Williams|  19|          3|       \N|   0.0|    null|2023-09-30 14:34:...|
|     2018|Australian Grand ...|2018-03-25 05:10:00|       Melbourne|Marcus Ericsson|            9|           Swedish|      Sauber|  17|          4|       \N|   0.0|    null|2023-09-30 14:34:...|
|     2018|Australia

In [0]:
from pyspark.sql.functions import sum, count, col, when

In [0]:
driver_standings_df = race_results_df.groupBy("race_year", "driver_nationality", "driver_name", "team")\
    .agg(count(when(col("position") == 1, True)).alias("wins"),
         sum("points").alias("total_points"))

In [0]:
driver_standings_df.show(10)

+---------+------------------+---------------+------------+----+------------+
|race_year|driver_nationality|    driver_name|        team|wins|total_points|
+---------+------------------+---------------+------------+----+------------+
|     2018|           Spanish|Fernando Alonso|     McLaren|   0|        50.0|
|     2015|            Danish|Kevin Magnussen|     McLaren|   0|         0.0|
|     2012|           British| Lewis Hamilton|     McLaren|   4|       190.0|
|     2016|            Danish|Kevin Magnussen|     Renault|   0|         7.0|
|     2013|           Finnish|Valtteri Bottas|    Williams|   0|         4.0|
|     2012|           Spanish|Fernando Alonso|     Ferrari|   3|       278.0|
|     2015|            German|Nico Hülkenberg| Force India|   0|        58.0|
|     2013|         Brazilian|   Felipe Massa|     Ferrari|   0|       112.0|
|     2017|         Brazilian|   Felipe Massa|    Williams|   0|        43.0|
|     2016|            French|Romain Grosjean|Haas F1 Team|   0|

In [0]:
display(driver_standings_df.filter("race_year = 2020"))

race_year,driver_nationality,driver_name,team,wins,total_points
2020,French,Romain Grosjean,Haas F1 Team,0,2.0
2020,British,George Russell,Williams,0,0.0
2020,Danish,Kevin Magnussen,Haas F1 Team,0,1.0
2020,Thai,Alexander Albon,Red Bull,0,105.0
2020,French,Pierre Gasly,AlphaTauri,1,75.0
2020,Finnish,Kimi Räikkönen,Alfa Romeo,0,4.0
2020,German,Nico Hülkenberg,Racing Point,0,10.0
2020,Brazilian,Pietro Fittipaldi,Haas F1 Team,0,0.0
2020,Mexican,Sergio Pérez,Racing Point,1,125.0
2020,British,Jack Aitken,Williams,0,0.0


In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import rank, desc, asc

In [0]:
driver_rank_spec = Window.partitionBy("race_year").orderBy(desc("total_points"), desc("wins"))
final_df = driver_standings_df.withColumn("rank",rank().over(driver_rank_spec))

In [0]:
final_df.show(10)

+---------+------------------+---------------+------------+----+------------+----+
|race_year|driver_nationality|    driver_name|        team|wins|total_points|rank|
+---------+------------------+---------------+------------+----+------------+----+
|     1950|           Italian|    Nino Farina|  Alfa Romeo|   3|        30.0|   1|
|     1950|           Italian|  Luigi Fagioli|  Alfa Romeo|   0|        28.0|   2|
|     1950|         Argentine|    Juan Fangio|  Alfa Romeo|   3|        27.0|   3|
|     1950|            French|   Louis Rosier| Talbot-Lago|   0|        13.0|   4|
|     1950|           Italian| Alberto Ascari|     Ferrari|   0|        11.0|   5|
|     1950|          American|Johnnie Parsons|Kurtis Kraft|   1|         9.0|   6|
|     1950|          American|   Bill Holland|       Deidt|   0|         6.0|   7|
|     1950|              Thai|    Prince Bira|    Maserati|   0|         5.0|   8|
|     1950|        Monegasque|   Louis Chiron|    Maserati|   0|         4.0|   9|
|   

In [0]:
display(final_df.filter("race_year = 2020"))

race_year,driver_nationality,driver_name,team,wins,total_points,rank
2020,British,Lewis Hamilton,Mercedes,11,347.0,1
2020,Finnish,Valtteri Bottas,Mercedes,2,223.0,2
2020,Dutch,Max Verstappen,Red Bull,2,214.0,3
2020,Mexican,Sergio Pérez,Racing Point,1,125.0,4
2020,Australian,Daniel Ricciardo,Renault,0,119.0,5
2020,Thai,Alexander Albon,Red Bull,0,105.0,6
2020,Spanish,Carlos Sainz,McLaren,0,105.0,6
2020,Monegasque,Charles Leclerc,Ferrari,0,98.0,8
2020,British,Lando Norris,McLaren,0,97.0,9
2020,French,Pierre Gasly,AlphaTauri,1,75.0,10


In [0]:
final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/driver_standings")