In [None]:
# | echo: false

%matplotlib inline

import itertools
import warnings
import logging
import fastf1

from tqdm import tqdm
from itables import show

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import arviz as az
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

sns.set_theme()
sns.set_style(style="darkgrid", rc={"axes.facecolor": ".9", "grid.color": ".8"})
sns.set_palette(palette="deep")
sns_c = sns.color_palette(palette="deep")

plt.rcParams["figure.figsize"] = [10, 7]

az.style.use("arviz-docgrid")

In [None]:
# | echo: false

SEASONS = [2022, 2023]

RENAME_MAP = {
    "Abbreviation": "driver",
    "TeamId": "constructor",
    "ClassifiedPosition": "classified_position",
    "GridPosition": "grid_position",
    "Points": "points",
    "Position": "position",
    "Status": "status",
}

GP_COLS = [
    "Abbreviation",
    "TeamId",
    "Position",
    "ClassifiedPosition",
    "GridPosition",
    "Points",
    "Status",
]
QUALI_COLS = ["Abbreviation", "Position"]
GP_DATA_COLS = [
    "season",
    "round",
    "circuit",
    "driver",
    "constructor",
    "qualifying_position",
    "grid_position",
    "classified_position",
    "position",
    "points",
    "fastest_lap",
    "status",
]

In [None]:
# | echo: false


def get_gp_data(season, gp):
    quali_session = fastf1.get_session(
        year=season, gp=gp.RoundNumber, identifier="Qualifying"
    )
    quali_session.load(telemetry=False, laps=False, weather=True)

    gp_session = fastf1.get_session(year=season, gp=gp.RoundNumber, identifier="Race")
    gp_session.load(telemetry=False, laps=True, weather=True)

    return (
        gp_session.results[GP_COLS]
        .rename(RENAME_MAP, axis=1)
        .join(
            quali_session.results[QUALI_COLS]
            .rename(RENAME_MAP, axis=1)
            .set_index("driver"),
            on="driver",
            rsuffix="_qualifying",
        )
        .rename({"position_qualifying": "qualifying_position"}, axis=1)
        .assign(
            fastest_lap=lambda df: df["driver"]
            == gp_session.laps.pick_fastest().Driver,
            circuit=gp.Location,
            round=gp.RoundNumber,
            season=season,
        )
    )[GP_DATA_COLS]


def get_season_data(season):
    schedule = fastf1.get_event_schedule(season, include_testing=False)
    gp_list = list(schedule[["RoundNumber", "Location"]].itertuples())

    season_data = []
    for gp in tqdm(gp_list):
        season_data.append(get_gp_data(season, gp))

    return pd.concat(season_data)


def get_seasons_data(seasons):
    return pd.concat([get_season_data(season) for season in seasons]).reset_index(drop=True)

In [None]:
# | output: false
# | echo: false

logging.disable(logging.CRITICAL)

data_df = get_seasons_data(SEASONS)

logging.disable(logging.NOTSET)

100%|████████████████████████████████████████████████| 22/22 [03:07<00:00,  8.51s/it]
100%|████████████████████████████████████████████████| 22/22 [00:16<00:00,  1.30it/s]


In [None]:
# | echo: false

DATASET_PATH = {"gp": "results", "quali": "qualifying"}
STATUS_FINISHED = ["Finished", "+1 Lap", "+2 Laps"]
    

def add_scoring_cols(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(
        # Grid value of 0 indicates pit lane start; here we set that to 99
        # to simplify the check for who won out in qualifying.
        grid_position=lambda x: x["grid_position"].where(x["grid_position"] != 0, 20)
    ).assign(
        # Positions gained compared to the starting grid position; scoring doesn't
        # care about positions lost, so we set anything below 0 to 0.
        positions_gained=lambda x: np.maximum(x["grid_position"] - x["position"], 0),
        # Whether the driver won pole position
        has_pole=lambda x: x["qualifying_position"] == 1,
        # Whether the driver beat their teammate in qualifying
        beat_teammate_quali=lambda x: x.groupby(
            # Each group is per race, per constructor, so only 2 rows - one for each driver.
            ["season", "circuit", "constructor"],
            group_keys=False,
        )
        .apply(
            # Smaller grid pos. = better; the grid position is compared
            # to the reversed grid array in the group (essentially
            # we create a cartesian product of the grid pos.)
            lambda g: g["qualifying_position"]
            < g["qualifying_position"].iloc[::-1].values
        ),
        # Same as the previous column, but for finishing position in the race.
        beat_teammate_race=lambda x: x.groupby(
            ["season", "circuit", "constructor"], group_keys=False
        )
        .apply(
            lambda g: (g["position"] < g["position"].iloc[::-1].values)
            & g["status"].isin(STATUS_FINISHED)
        ),
        has_fastest_lap=lambda x: x["fastest_lap"] == 1,
    )


def score_driver(x):
    return pd.Series(
        x["has_pole"] * 10 + x["beat_teammate_quali"] * 5, dtype=float
    ) + pd.Series(
        x["points"] + x["positions_gained"] * 2 + x["beat_teammate_race"] * 5,
        dtype=float,
    ).where(
        x["status"].isin(STATUS_FINISHED), 0
    )


def score_constructor(x):
    finished = x["status"].isin(STATUS_FINISHED)
    match finished.sum():
        case 2:
            finish_bonus = 5
        case 1:
            finish_bonus = 2
        case _:
            finish_bonus = 0

    return x["points"].sum() + x[finished]["positions_gained"].sum() + finish_bonus


def score_fantasy_points(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(
        driver_fantasy_points=score_driver,
        # Constructor points need to be joined back in on the grouping columns, in order to
        # fill in the missing spots with duplicate values - since we have 20 drivers, but 10 constructors,
        # the group has fewer rows and needs to be broadcast per group on the index.
        constructor_fantasy_points=lambda x: x.join(
            x.groupby(["season", "circuit", "constructor"])
            .apply(score_constructor)
            .rename("constructor_points"),
            on=["season", "circuit", "constructor"],
        )["constructor_points"],
    )

In [None]:
# | echo: false


score_df = data_df.pipe(add_scoring_cols).pipe(score_fantasy_points).sort_values(["season", "round", "position"], ascending=[False, True, True])

In [None]:
# | echo: false

DISPLAY_COLS = [
    "season",
    "round",
    "circuit",
    "driver",
    "constructor",
    "qualifying_position",
    "position",
    "status",
    "driver_fantasy_points",
    "constructor_fantasy_points",
]

In [None]:
# | echo: false 
# | title: Fantasy GP points for the 2022, 2023 Formula 1 season

show(
    score_df[DISPLAY_COLS],
    scrollX=True,
    pageLength=20,
    buttons=["csv", "colvis"],
    showIndex=False,
    lengthMenu=[20,30,40],
    footer=True,
    search={"regex": True, "caseInsensitive": True},
    maxBytes=0
)

season,round,circuit,driver,constructor,qualifying_position,position,status,driver_fantasy_points,constructor_fantasy_points
Loading... (need help?),,,,,,,,,
season,round,circuit,driver,constructor,qualifying_position,position,status,driver_fantasy_points,constructor_fantasy_points


In [None]:
# | echo: false
s = fastf1.get_session(2022, "Saudi Arabia", "Race")
s.load(telemetry=False, laps=True, weather=True)
s.track_status