# Constructor Standings
1. Read Data
2. Aggregate & Rank
3. Write Data

In [0]:
# Import Modules
from pyspark.sql.window import Window
from pyspark.sql.functions import col, desc, when, count, sum, rank, dense_rank

In [0]:
%run "../../01-Setup/09-Global-Variables"

In [0]:
%run "../../01-Setup/10-Global-Functions"

### Read Data

In [0]:
# Read Race Results Data
race_results_df = spark.read.format('delta').load(f'{curated_delta_database_folder_path}/race_results_delta')

# Display Data
display(race_results_df)

race_id,race_year,race_name,race_date,circuit_location,driver_id,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,created_date
356,1989,Brazilian Grand Prix,,Rio de Janeiro,95,Nigel Mansell,,British,Ferrari,6,,1:38:58.744,9.0,1.0,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,117,Alain Prost,,French,McLaren,5,,+7.809,6.0,2.0,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,127,Maurício Gugelmin,,Brazilian,March,12,,+9.370,4.0,3.0,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,65,Johnny Herbert,,British,Benetton,10,,+10.493,3.0,4.0,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,118,Derek Warwick,,British,Arrows,8,,+17.866,2.0,5.0,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,145,Alessandro Nannini,,Italian,Benetton,11,,+18.241,1.0,6.0,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,92,Bertrand Gachot,,Belgian,Onyx,0,,\N,0.0,,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,140,Stefan Johansson,,Swedish,Onyx,0,,\N,0.0,,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,88,Aguri Suzuki,,Japanese,Zakspeed,0,,\N,0.0,,2023-08-22T18:55:32.566+0000
356,1989,Brazilian Grand Prix,,Rio de Janeiro,164,Joachim Winkelhock,,German,AGS,0,,\N,0.0,,2023-08-22T18:55:32.566+0000


### Aggregate & Rank

In [0]:
# Group By Team
team_standing_df = race_results_df \
    .groupBy('race_year', 'team') \
    .agg( \
        sum('points').alias('total_points'), \
        count(when(col('position') == 1, True)).alias('total_wins') \
    ) 

# Display Data
display(team_standing_df)

race_year,team,total_points,total_wins
1990,Benetton,71.0,2
2007,Ferrari,204.0,9
2007,Honda,6.0,0
2019,Alfa Romeo,57.0,0
2012,Sauber,126.0,0
1990,Larrousse,11.0,0
1953,Turner,0.0,0
2014,Toro Rosso,30.0,0
2016,McLaren,76.0,0
1954,Schroeder,0.0,0


In [0]:
# Create Rank Specification
team_rank_spec = Window \
    .partitionBy('race_year') \
    .orderBy(desc('total_points'), desc('total_wins'))

# Apply Rank to Data Frame
team_standing_ranked_df = team_standing_df \
    .withColumn('rank', rank().over(team_rank_spec)) \
    .withColumn('dense_rank', dense_rank().over(team_rank_spec)) \

# Display Data
display(team_standing_ranked_df)

race_year,team,total_points,total_wins,rank,dense_rank
1950,Alfa Romeo,89.0,6,1,1
1950,Ferrari,21.0,0,2,2
1950,Talbot-Lago,20.0,0,3,3
1950,Kurtis Kraft,14.0,1,4,4
1950,Maserati,11.0,0,5,5
1950,Deidt,10.0,0,6,6
1950,Simca,3.0,0,7,7
1950,Rae,0.0,0,8,8
1950,Langley,0.0,0,8,8
1950,Lesovsky,0.0,0,8,8


### Write Data

In [0]:
# Write DataFrame to FileSystem in Parquet Format
team_standing_ranked_df.write \
    .mode('overwrite') \
    .format('delta') \
    .save(f'{curated_delta_folder_path}/constructor_standings_delta')

In [0]:
# Merge Delta Data (No Partitions)
merge_condition = 'tgt.race_year = src.race_year and tgt.team = src.team'
merge_partitioned_delta_data(team_standing_ranked_df, 'formula1_curated_delta', 'constructor_standings_delta', curated_delta_database_folder_path, 'race_year', merge_condition)

In [0]:
# Read File
df = spark.read.format('delta').load(f'{curated_delta_database_folder_path}/constructor_standings_delta')

# Display Data
display(df)

race_year,team,total_points,total_wins,rank,dense_rank
1960,Cooper-Climax,102.0,6,1,1
1960,Team Lotus,52.0,2,2,2
1960,Ferrari,43.0,1,3,3
1960,Watson,14.0,1,4,4
1960,BRM,8.0,0,5,5
1960,Epperly,4.0,0,6,6
1960,Phillips,3.0,0,7,7
1960,Cooper-Maserati,3.0,0,7,7
1960,Cooper-Castellotti,3.0,0,7,7
1960,Lesovsky,2.0,0,10,8


In [0]:
%sql
-- Read Table
select *
from formula1_curated_delta.constructor_standings_delta;

race_year,team,total_points,total_wins,rank,dense_rank
1960,Cooper-Climax,102.0,6,1,1
1960,Team Lotus,52.0,2,2,2
1960,Ferrari,43.0,1,3,3
1960,Watson,14.0,1,4,4
1960,BRM,8.0,0,5,5
1960,Epperly,4.0,0,6,6
1960,Phillips,3.0,0,7,7
1960,Cooper-Maserati,3.0,0,7,7
1960,Cooper-Castellotti,3.0,0,7,7
1960,Lesovsky,2.0,0,10,8


In [0]:
# Notbook Exit Output
dbutils.notebook.exit("Driver Ranking Successful")