# 2025 Championship Analysis

In this notebook I will take a look at the 2025 WDC championship mainly.

The idea is: McLaren easily won WCC but lost (or almost lost) the WDC. I'm starting the work on this before the championship actually finished.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import os
from manim import *

In [None]:
# Caminho absoluto da raiz do projeto
project_root = Path.cwd().parents[1]  # se o notebook está em /notebooks
print("Diretório atual:", Path.cwd())
os.chdir(project_root)

from src.modules.data_processing.db_reader import DbReader
from src.analysis.championship.plotter import *
from src.modules.animation.manim_constructors import LineChampionshipChart
from src.analysis.data_viz.plotter import *
f1_db = DbReader()

In [None]:
GRAFS_DIR = 'notebooks/2025_championship/grafs/'

In [None]:
# Drivers Standings

df_wdc = f1_db.run_query_file('data/db_queries/drivers_standings.sql')

In [None]:
df_wdc

In [None]:
df_wcc = f1_db.run_query_file('data/db_queries/constructors_standings.sql')
df_wcc_2025 = df_wcc[df_wcc['year']==2025].copy()

In [None]:
df_wcc

In [None]:
df_races = f1_db.run_query_file('data/db_queries/race_results_report.sql')
df_races_2025 = df_races[df_races['year']==2025].copy()

In [None]:
df_races

In [None]:
# Adding info to wdc df that will be useful later

df_wdc = df_wdc.merge(
    df_races[['round_id', 'driver_id', 'finishing_position', 'points_scored']],
    how='left',
    on=['round_id', 'driver_id']
).rename(columns={
    'finishing_position': 'finishing_position_at_round',
    'points_scored': 'points_scored_at_round'
    }
)

df_wdc_2025 = df_wdc[df_wdc['year']==2025].copy()

df_wdc_2025

## Championship Evolution: 

### WCC

McLaren easily won the WCC. Let's take a look:

In [None]:
plot_wcc(
    df_campeonato=df_wcc_2025,
    )


In [None]:
plot_wcc(
    df_campeonato=df_wcc_2025,
    times_destaque=['McLaren', 'Red Bull', 'Ferrari', 'Mercedes']
    )

Above we can see the evolution of the 2025 WCC.

By that we can see that McLaren clearly had the advantage along the year.

The only other teams fighting with eachother for the 3 top positions are Mercedes, Red Bull and Ferrari (somehow).

### Manim

Animated WCC chart:

#### Adjustments for animated chart:

In [None]:
# Correção dos nomes dos construtores para coincidir com o dicionário de cores

dict_fix_constructor_names = {
    'RB F1 Team': 'VCARB',
    'Haas F1 Team': 'Haas',
    'Alpine F1 Team': 'Alpine'
}

df_wcc_2025['constructor_name'] = df_wcc_2025['constructor_name'].replace(dict_fix_constructor_names)

In [None]:
dict_team_colors = {
    "Red Bull": "#3671C6",       # Azul Clássico
    "Mercedes": "#27F4D2",       # Verde/Ciano Petronas (brilha bem no escuro)
    "Ferrari": "#E80020",        # Rosso Corsa
    "McLaren": "#FF8000",        # Laranja Papaya
    "Aston Martin": "#229971",   # British Racing Green
    "Alpine": "#0090FF",         # Azul Alpine (Nota: às vezes usam rosa da BWT)
    "Williams": "#64C4FF",       # Azul Celeste
    "VCARB": "#6692FF",          # Azul 'Visa Cash App' (mais claro que a RBR)
    "Sauber": "#52E252",         # Verde Neon (Kick/Stake branding)
    "Haas": "#B6BABD",           # Cinza/Branco (neutro)
    "Cooper-Climax": '#004225'   # Verde Escuro (clássico)
}

In [None]:
race_list = [race[0] for race in df_wcc_2025[['round_id', 'race_name']].drop_duplicates().sort_values('round_id')[['race_name']].values]

race_list = [race.replace('Grand Prix', 'GP') for race in race_list]

In [None]:
y_max = df_wcc_2025['points'].max() + 10
x_max = df_wcc_2025['round_id'].max() - df_wcc_2025['round_id'].min() + 1
df_wcc_2025_grouped = df_wcc_2025.groupby('constructor_name')

In [None]:
dict_axis_config = {
    "include_numbers": True, 
    "color": GRAY, 
    'include_ticks': False, 
    'include_tip': False,
    'font_size': 20,
}

In [None]:
team_file_map = {
    "Red Bull": "red_bull.png",
    "Ferrari": "ferrari.png",
    "McLaren": "mclaren.png",
    "Mercedes": "mercedes.png",
    "Aston Martin": "aston_martin.png",
    "Alpine": "alpine.png",
    "Williams": "williams.png",
    "VCARB": "vcarb.png",
    "Sauber": "sauber.png",
    "Haas": "haas.png"
}

In [None]:
class WCC_Animated_Chart(LineChampionshipChart):
    def __init__(self, **kwargs):
        # Preparando o caminho dos logos como string
        logos_path_str = str(project_root / 'notebooks/2025_championship/assets/team_logos/')
        
        super().__init__(
            # --- SEUS DADOS ---
            df_grouped=df_wcc_2025_grouped,
            race_list=race_list,
            team_colors=dict_team_colors,
            x_max=x_max,
            y_max=y_max,
            
            # --- CUSTOMIZAÇÃO ---
            # Aqui passamos o dicionário de eixos que você pediu
            axis_config=dict_axis_config, 
            
            # Logos
            logos_dir=logos_path_str,
            logo_map=team_file_map,
            
            # --- CONTROLES DE VÍDEO ---
            static_mode=False,        # False = Gera o vídeo animado
            show_gap=True,            # Mostra a linha de gap
            
            # Estilo
            color_highlight=dict_team_colors.get('McLaren', "#FF8000"),

            chart_title='F1 2025 World Constructors Championship',
            
            **kwargs
        )

In [None]:
config_wcc = {
    "media_dir": "./media_wcc",
    "transparent": True,      
    "quality": "high_quality",
    "preview": False,
    "verbosity": "WARNING",
}

# O bloco 'with' aplica as configurações temporariamente e força o recálculo dos caminhos

generate_chart = True # Flag so that the chart doens't get generated everytime the notebook runs

if generate_chart:
    
    with tempconfig(config_wcc):
        
        # Instancie e renderize DENTRO do bloco
        scene = WCC_Animated_Chart()
        scene.render()
        
        # Verifica onde salvou (dentro do contexto, o config reflete o tempconfig)
        print(f"Vídeo salvo em: {os.path.abspath(config.output_file)}")

### Let's take a look at points in final round:

In [None]:
df_wcc_2025_final_round = df_wcc_2025[df_wcc_2025['round_id'] == df_wcc_2025['round_id'].max()].copy()

df_wcc_2025_final_round['points_percentage'] = df_wcc_2025_final_round['points'] / df_wcc_2025_final_round['points'].sum() * 100

df_wcc_2025_final_round

In [None]:
graf_barras_padrao(
    df_dados=df_wcc_2025_final_round,
    x_col='constructor_name',
    y_col='points',
    hue_col=None,
    cores_map=dict_team_colors,
    titulo="Points by Constructor in Final Round 2025",
    xlabel="Constructor",
    ylabel="Points",
    save_fig=True,
    save_path=GRAFS_DIR,
    dodge=False
)

In [None]:
graf_barras_padrao(
    df_dados=df_wcc_2025_final_round,
    x_col='constructor_name',
    y_col='points_percentage',
    hue_col=None,
    cores_map=dict_team_colors,
    titulo="Points Percentage by Constructor in Final Round 2025",
    xlabel="Constructor",
    ylabel="Points Percentage (%)",
    fmt_rotulo='%.2f%%',
    save_fig=True,
    save_path=GRAFS_DIR,
    dodge=False
)

Above we can see that McLaren scored way more points than any other team in terms of percentage, and also how tight the battle was for second place between Mercedes, Red Bull and Ferrari.

And also, I feel kinda sorry for Alpine.

Let's look how this McLaren "domination" looks like if we compare it to historical values:

In [None]:
# Primeiro vou ranquear os maiores percentuais históricos de pontos no WCC

df_wcc['last_round_id_in_year'] = df_wcc.groupby('year')['round_id'].transform('max')

df_wcc_final_round = df_wcc[df_wcc['round_id'] == df_wcc['last_round_id_in_year']].copy()

df_wcc_final_round['total_points_score_in_year'] = df_wcc_final_round.groupby('year')['points'].transform('sum')

df_wcc_final_round['points_percentage'] = df_wcc_final_round['points'] / df_wcc_final_round['total_points_score_in_year'] * 100

df_wcc_final_round['highest_percentage_in_year'] = df_wcc_final_round.groupby('year')['points_percentage'].transform('max')

df_wcc_final_round_highest_pctgs = df_wcc_final_round[df_wcc_final_round['points_percentage'] == df_wcc_final_round['highest_percentage_in_year']].copy()

df_wcc_final_round_highest_pctgs['constructor_name_year'] = df_wcc_final_round_highest_pctgs['constructor_name'] + ' (' + df_wcc_final_round_highest_pctgs['year'].astype(str) + ')'

df_wcc_final_round_highest_pctgs = df_wcc_final_round_highest_pctgs.sort_values(['highest_percentage_in_year'], ascending=False)

df_wcc_final_round_highest_pctgs

In [None]:
df_wcc_final_round_highest_pctgs['constructor_name_year'].head(10).unique()

In [None]:
dict_map_cores_highest_pctgs = {}

for name_year in df_wcc_final_round_highest_pctgs['constructor_name_year'].unique():
    for equipe, cor in dict_team_colors.items():
        if equipe in name_year:
            dict_map_cores_highest_pctgs[name_year] = cor

graf_barras_padrao(
    df_dados=df_wcc_final_round_highest_pctgs.head(10),
    x_col='constructor_name_year',
    y_col='points_percentage',
    hue_col=None,
    cores_map=dict_map_cores_highest_pctgs,
    titulo="Top 10 Highest Points Percentage by Constructor in a Season (WCC History)",
    xlabel="Constructor (Year)",
    ylabel="Points Percentage (%)",
    fmt_rotulo='%.2f%%',
    save_fig=True,
    save_path=GRAFS_DIR,
    dodge=False
)

Historically speaking, there have been mopre dominant performances, but all from completely different eras of F1.

If we take a look at only the past 20 years:

In [None]:
graf_barras_padrao(
    df_dados=df_wcc_final_round_highest_pctgs[df_wcc_final_round_highest_pctgs['year']>2004].head(10),
    x_col='constructor_name_year',
    y_col='points_percentage',
    hue_col=None,
    cores_map=dict_map_cores_highest_pctgs,
    titulo="Top 10 Highest Points Percentage by Constructor in a Season (WCC Past 20 Years)",
    xlabel="Constructor (Year)",
    ylabel="Points Percentage (%)",
    fmt_rotulo='%.2f%%',
    save_fig=True,
    save_path=GRAFS_DIR,
    dodge=False
)

Still not in the top 10, but not that far off. But anyways, despite not being one of the most dominant performances historically speaking (*coff* could have been considering some stuff that happened in the season *coff*) it was still a breeze.

If we take 2007 out (spygate):

In [None]:
graf_barras_padrao(
    df_dados=df_wcc_final_round_highest_pctgs[df_wcc_final_round_highest_pctgs['year']>2007].head(10),
    x_col='constructor_name_year',
    y_col='points_percentage',
    hue_col=None,
    cores_map=dict_map_cores_highest_pctgs,
    titulo="Top 10 Highest Points Percentage by Constructor in a Season (WCC Past 20 Years - Ex 2007)",
    xlabel="Constructor (Year)",
    ylabel="Points Percentage (%)",
    fmt_rotulo='%.2f%%',
    save_fig=True,
    save_path=GRAFS_DIR,
    dodge=False
)

Well well... If we take 2007 out of the list (which makes sense because there was a disqualification) than McLaren shows up in the top 10. And I find that absolutely hilarious. And if you don't understand why I find this hilarious I suggest searching what happened in the 2007 WCC after watching this video.

And certainly McLaren could be higher up in this list if it wasn't for some "questionable" actions/misfortunes along the championship.

While there’s plenty to unpack in the Constructors' Championship, the real story of the year was the Drivers' Championship. As we dive deeper into that, I’ll circle back to some of the questionable moves and misfortunes that defined the season.

### WDC

In contrast, if we take a look at the WDC

In [None]:
dict_cores_pilotos = {
    # --- RED BULL RACING ---
    'Verstappen': "#1D1A7F",  
    'Perez':      "#7878FF",  
    'Lawson':     "#7878FF",
    
    # --- MCLAREN ---
    'Norris':     "#FF8000",  
    'Piastri':    "#E0C52D",  

    # --- FERRARI ---
    'Leclerc':    "#DC0000",  
    'Hamilton':   "#E81A17",  

    # --- MERCEDES ---
    'Russell':    "#00D2BE",  
    'Antonelli':  "#399A92",  

    # --- ASTON MARTIN ---
    'Alonso':     "#006F62",  
    'Stroll':     "#00A391",  

    # --- WILLIAMS ---
    'Sainz':      "#0090FF",  
    'Albon':      "#041E42",  

    # --- ALPINE ---
    'Gasly':      "#005BA9",  
    'Doohan':     "#FF87BC",
    'Colapinto':  "#156FAB",

    # --- SAUBER (AUDI PRELUDE)
    'Hülkenberg': "#52E252",  
    'Bortoleto':  "#1E1E1E",  

    # --- HAAS ---
    'Ocon':       "#B6BABD",  
    'Bearman':    "#F94040",  

    # --- RB (VCARB) ---
    'Tsunoda':    "#6692FF",  
    'Hadjar':     "#FFFFFF",  


    # Históricos:
    'Vettel': '#243E94',
    'Hunt': '#111111',
    'Hill': '#002F6C',
    'Ickx': '#8B0000',
}

In [None]:
df_wdc_2025_final_round = df_wdc_2025[df_wdc_2025['round_id'] == df_wdc_2025['round_id'].max()]

In [None]:
graf_barras_padrao(
    df_dados=df_wdc_2025_final_round,
    x_col='driver_surname',
    y_col='points',
    cores_map=dict_cores_pilotos,
    barlabel_fontsize=20,
    axislabel_fontsize=20,
    xlabel='Driver',
    ylabel='Points',
    titulo='2025 WDC F1 Stadings at Season End',
    title_fontsize=22,
    tick_fontsize=18
)

In [None]:
plot_wdc(df_campeonato=df_wdc_2025)

In [None]:
plot_wdc(
    df_campeonato=df_wdc_2025,
    pilotos_destaque=[
        'Max Verstappen',
        'Lando Norris',
        'Oscar Piastri'
    ],
    save_fig=True,
    save_path='grafs/2025_championship'
    )

In contrast to the WCC, the WDC was a tight battle between Verstappen, Piastri and the champion Norris. The battle can be divided into three different phases in my view:

- Start -- Emilia Romagna: Tighter battle between the three, with no clear advantage between the three of them. But Oscar was starting to take the lead of the championship.

- Emilia Romagna -- Dutch GP: RBRs downfall and lead well mantained by Oscar

- From the Dutch GP: Rise of Verstappen and Norris and Oscar downfall

#### Manim Animated Chart for WDC:

In [None]:
y_max_wdc = df_wdc_2025['points'].max() + 10
x_max_wdc = df_wdc_2025['round_id'].max() - df_wdc_2025['round_id'].min() + 1
df_wdc_2025_vpn = df_wdc_2025[df_wdc_2025['driver_surname'].isin(['Verstappen', 'Norris', 'Piastri'])]
df_wdc_2025_vpn_grouped = df_wdc_2025_vpn.groupby('driver_surname')


In [None]:
faces_file_map = {
    "Verstappen": "verstappen.png",
    "Norris": "norris.png",
    "Piastri": "piastri.png",
}

In [None]:
class WDC_Animated_Chart(LineChampionshipChart):
    def __init__(self, **kwargs):
        # Preparando o caminho dos logos como string
        faces_path_str = str(project_root / 'notebooks/2025_championship/assets/drivers_pics/')
        
        super().__init__(
            # --- SEUS DADOS ---
            df_grouped=df_wdc_2025_vpn_grouped,
            race_list=race_list,
            team_colors=dict_cores_pilotos,
            x_max=x_max_wdc,
            y_max=y_max_wdc,
            
            # --- CUSTOMIZAÇÃO ---
            # Aqui passamos o dicionário de eixos que você pediu
            axis_config=dict_axis_config, 
            
            # Logos
            logos_dir=faces_path_str,
            logo_map=faces_file_map,
            
            # --- CONTROLES DE VÍDEO ---
            static_mode=False,        # False = Gera o vídeo animado
            show_gap=False,            # Mostra a linha de gap
            
            # Estilo
            color_highlight=dict_team_colors.get('McLaren', "#FF8000"),

            chart_title='F1 2025 World Drivers Championship - Top 3',
            
            **kwargs
        )

In [None]:
config_wdc = {
    "media_dir": "./media_wdc",
    "transparent": True,      
    "quality": "high_quality",
    "preview": False,
    "verbosity": "WARNING",
}

generate_chart = True # Flag so that the chart doens't get generated everytime the notebook runs

if generate_chart:

    # O bloco 'with' aplica as configurações temporariamente e força o recálculo dos caminhos
    with tempconfig(config_wdc):
        
        # Instancie e renderize DENTRO do bloco
        scene_2 = WDC_Animated_Chart()
        scene_2.render()
        
        # Verifica onde salvou (dentro do contexto, o config reflete o tempconfig)
        print(f"Vídeo salvo em: {os.path.abspath(config.output_file)}")

## WDC Deep dive:

In [None]:
# Creating columns and preparing datasets for analysis

df_wdc_2025['points_for_leader'] = df_wdc.groupby('round_id')['points'].transform('max')

df_wdc_2025['gap_to_leader'] = df_wdc_2025['points_for_leader'] - df_wdc_2025['points']

df_wdc_2025_vpn = df_wdc_2025[df_wdc_2025['driver_surname'].isin(['Verstappen', 'Norris', 'Piastri'])].copy()

df_wdc_2025_vpn = df_wdc_2025_vpn.sort_values(['round_id', 'gap_to_leader'], ascending=[True, False])

df_wdc_2025_vpn

In [None]:
df_wdc_2025_vpn[df_wdc_2025_vpn['race_name']=='Las Vegas Grand Prix']

In [None]:
graf_barras_padrao(
    df_dados=df_wdc_2025_vpn[df_wdc_2025_vpn['gap_to_leader']>0],
    x_col='race_name',
    y_col='gap_to_leader',
    hue_col='driver_surname',
    titulo='Points Gap to Leader',
    ylabel='Points Gap',
    xlabel='Round',
    titulo_legenda='Driver',
    dodge=True,
    barlabel_fontsize=14,
    cores_map=dict_cores_pilotos
)

In the chart above, we can see a little bit clearer what I was referring to:

- We can see that untill Emilia Romagna, the championship didn't have a clear leader. The points gap were still building and we were having a tight battle;
- After Emilia Romagna, the story changes: Piastri stays at the lead for several rounds and Verstappen gap to the lead increases immensely.
- Than, with the dutch GP: another shift in the championship battle starts. Verstappen starts making a historic comeback (more on that later) and Norris starts to close in on Piastri to secure the lead for the final rounds and umtimately winning the championship.

### WDC Chapters:

And, to make the story easier to unpack, I'm going to separate the championship between these different phases, looking at stats and important events that happened in each chapter. Focusing only, of course, in the battle for the championship between the 3 drivers:

In [None]:
# Adding the chapters to the dataframe:

em_rom_round_id = 1132

dutch_gp_round_id = 1140

def add_chapter_to_df(df, round_id_col='round_id'):
    choices = ['Early Season', 'Mid Season', 'Late Season']
    df['season_chapter'] = pd.cut(df[round_id_col],
                                  bins=[-float('inf'), em_rom_round_id - 1, dutch_gp_round_id - 1, float('inf')],
                                  labels=choices)
    return df

df_wdc_2025_vpn = add_chapter_to_df(df_wdc_2025_vpn)

df_wdc_2025_vpn

In [None]:
df_wdc_chapters = df_wdc_2025_vpn.groupby(
    [
        'season_chapter',
        'driver_surname'
    ]
).agg(
    {
        'gap_to_leader': ['max', 'mean'],
        'finishing_position_at_round': ['mean', 'min'],
        'points_scored_at_round': ['sum', 'mean']
    }
).reset_index()

df_wdc_chapters.columns = ['_'.join(col).strip() for col in df_wdc_chapters.columns.values]

df_wdc_chapters

In [None]:
graf_barras_padrao(
    df_dados=df_wdc_chapters.sort_values(['season_chapter_', 'finishing_position_at_round_mean'], ascending=True),
    x_col='season_chapter_',
    y_col='finishing_position_at_round_mean',
    hue_col='driver_surname_',
    fmt_rotulo='%.2f',
    dodge=True,
    xlabel='Season Chapter',
    ylabel='Average Finishing Position',
    titulo='Average Finishing Position by Round',
    titulo_legenda='Driver',
    cores_map=dict_cores_pilotos
)

Above we can see that in each chapter of the championship, the average finishing position changes quite substiantially for each driver. This can also be seen when we look at the average points gained:

In [None]:
graf_barras_padrao(
    df_dados=df_wdc_chapters.sort_values(['season_chapter_', 'finishing_position_at_round_mean'], ascending=True),
    x_col='season_chapter_',
    y_col='points_scored_at_round_mean',
    hue_col='driver_surname_',
    fmt_rotulo='%.2f',
    dodge=True,
    xlabel='Season Chapter',
    ylabel='Average points gained by Round',
    titulo='Average points gained by Round by Season Chapter',
    titulo_legenda='Driver',
    cores_map=dict_cores_pilotos
)

This also helps defining the three different phases of the championship but doesn't answer how it happened. Well, let's try to take a look...

### Diving a bit into each chapter:

In [None]:
df_wdc_2025_vpn

In [None]:
plot_chapter_cards(
    df_dados=df_wdc_2025_vpn,
    start_round=df_wdc_2025_vpn['round_id'].min(),
    end_round=em_rom_round_id-1,
    cores_map=dict_cores_pilotos
)

In [None]:
plot_chapter_cards(
    df_dados=df_wdc_2025_vpn,
    start_round=em_rom_round_id,
    end_round=dutch_gp_round_id-1,
    cores_map=dict_cores_pilotos
)

In [None]:
plot_chapter_cards(
    df_dados=df_wdc_2025_vpn,
    start_round=dutch_gp_round_id,
    end_round=df_wdc_2025_vpn['round_id'].max(),
    cores_map=dict_cores_pilotos
)

Max comeback in this part of the championship is simply amazing. Getting a podium on every single one of the last 10 races and 6 wins.

## Analysis of Max Comeback:

In [None]:
df_wdc_2025_vpn

In [None]:
df_plot = df_wdc_2025_vpn[(df_wdc_2025_vpn['driver_surname']=='Verstappen') & (df_wdc_2025_vpn['round_id']>=dutch_gp_round_id)].copy()

cor_verstappen = dict_cores_pilotos['Verstappen']

graf_barras_padrao(
    df_dados=df_plot,
    x_col='race_name',
    y_col='gap_to_leader',
    hue_col=None,
    cores_map={race: cor_verstappen for race in df_plot['race_name'].unique()},
    titulo="Verstappen Gap to Championship Lead - Late Season 2025",
    xlabel="Round",
    ylabel="Points Gap to Leader"
)

As we have already established, this was an absolutely amazing comeback. But how does that compare historically?

### Comparison against historic comebacks:

I thought of a few different ways to do this but I think I've arrived at the best one.

I heard the feedback from my first video when a few people said that I could have looked at the points comparison in terms of percentage and that would help compare current era points with older era points, when the points system was different.

But I think I have arrived at a better solution. What I can do is use the current era points system and apply that historically based on race position. And that is better specially now that we are analyzing a comeback, because previous points systems punished 2nd place more than now, besides the points achieved in each race actually being different also.

And, another thing I am going to do is normalize by number of rounds also, so that in any season in history, the same amount of points would be achieved no matter the amount of races that happened on a given season.

So, I created with the help of Gemini Code Assist, a helper class called `PointsNormalizer` that is in this codebase and I'm going to use that, along with WDC historical data to see just how historical this comeback from Verstappen was.

In [None]:
df_wdc

In [None]:
# I need to count the number of rounds present in every season, this will be needed for the PointsNormalizer

df_wdc['number_of_rounds_in_season'] = df_wdc.groupby('year')['round_id'].transform('nunique')

df_wdc

In [None]:
df_wdc['number_of_rounds_in_season'].max()

In [None]:
from src.analysis.championship.points_normalizer import PointsNormalizer

pt_norm = PointsNormalizer(
    scoring_system=PointsNormalizer.SCORING_MODERN_25,
    target_rounds=df_wdc['number_of_rounds_in_season'].max()
)

In [None]:
# Calculating points scored based on modern system for all races in history:

df_wdc['modern_points_scored_at_round'] = pt_norm.apply_scoring_pandas(
    df_wdc['finishing_position_at_round']
    )

# Now, I can normalize based on the number of races in that season:
df_wdc['modern_points_scored_at_round_normalized'] = df_wdc.apply(
    lambda row: pt_norm.normalize_points_by_number_of_rounds(
        row['modern_points_scored_at_round'],
        row['number_of_rounds_in_season']
    ),
    axis=1
)

In [None]:
df_wdc

Now that I have a comparable points system accross my entire historical dataset, I can try to do some calculations to identify great point gaps that were reduced (i.e. comebacks). Let's go and I will be commenting my code below:

In [None]:
# First, I need the points of the leader in each round historically, for each row of my dataframe:

df_wdc['points_for_leader_at_round'] = df_wdc.groupby('round_id')['points'].transform('max')

# Than, what I do is calculate the gap:

df_wdc['gap_to_leader_at_round'] = df_wdc['points_for_leader_at_round'] - df_wdc['points']

# Than I can identify the biggest gap a given driver had at a given round:

df_wdc['biggest_gap_to_leader_for_driver_in_season'] = df_wdc.groupby(['driver_id', 'year'])['gap_to_leader_at_round'].transform('max')

# After that, I calculate the gap at the end of the year (if that driver ended up winning, the gap will be zero at the end of the year)

# For that I need to identify the last round of each season:
df_wdc['last_round_id_in_season'] = df_wdc.groupby('year')['round_id'].transform('max')
# Than I can filter the dataframe to make my life easier, since I will only be looking at the point gap in the final round
df_wdc_final_rounds = df_wdc[
    (df_wdc['round_id'] == df_wdc['last_round_id_in_season'])
].copy()

# I can calculate the difference between the biggest gap in season, and the gap in the final round
df_wdc_final_rounds['gap_to_leader_reduction'] = df_wdc_final_rounds['biggest_gap_to_leader_for_driver_in_season'] - df_wdc_final_rounds['gap_to_leader_at_round']

In [None]:
df_wdc_final_rounds.sort_values('gap_to_leader_reduction', ascending=False)

In [None]:
df_wdc_final_rounds['driver_year_id'] = df_wdc_final_rounds.apply(lambda row: f"{row['driver_surname']} ({row['year']})", axis=1)

In [None]:
df_wdc_final_rounds

In [None]:
graf_barras_padrao(
    df_dados=df_wdc_final_rounds.sort_values('gap_to_leader_reduction', ascending=False).head(10),
    x_col='driver_year_id',
    y_col='gap_to_leader_reduction',
    cores_map=dict_cores_pilotos,
    xlabel='Point Reductions to Leader',
    ylabel='Driver (Year)',
    titulo='Biggest Points Recovery in F1 History'
)