# Downloading data from fastf1

This notebook contains methods to download data from Formula One races using the "fastf1" package.

In [53]:
from pathlib import Path
import pandas as pd
import fastf1
from warnings import filterwarnings
filterwarnings("ignore")

## Auxiliary methods



In [54]:
def add_session_information(df, session, **kwargs):
    for key, val in kwargs.items():
        df[key] = val
    df["session_start_time"] = session.session_start_time
    df["t0_date"] = session.t0_date
    return df

## Methods from fastf1

The following subsections encapsulate methods to get specific types of information.

### Year schedule

This method will fetch the schedule of races for a given season.

In [55]:
def fetch_year_schedule(year):
    schedule = fastf1.get_event_schedule(year)
    schedule["year"] = year
    schedule["is_testing"] = schedule.is_testing()
    return schedule

### Session

In [56]:
def fetch_session(year, round_number, session_type):
    if round_number == 0:
        session = fastf1.get_testing_session(year, 1, session_type)
    elif round_number > 0:
        session = fastf1.get_session(year, round_number, session_type)
    return session

### Drivers that participated in a session

In [57]:
def fetch_session_drivers(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    info = [pd.DataFrame(session.get_driver(dr)) for dr in session.drivers]
    df = pd.concat(info, axis=1).T
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Laps driven in a session

In [58]:
def fetch_session_laps(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.laps)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Weather in a session

In [59]:
def fetch_session_weather(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.weather_data)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Race control messages in a session

In [60]:
def fetch_session_race_control(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.race_control_messages)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Results of a session

In [61]:
def fetch_session_results(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.results)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Status of a session

In [62]:
def fetch_session_status(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.session_status)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Telemetry of a session

In [63]:
def fetch_session_telemetry(year, round_number, session_type):
    def _get_by_driver(session, driver_number):
        dfls = []
        for _, lap in session.laps.pick_driver(driver_number).iterlaps():
            df = lap.get_car_data()
            df["LapNumber"] = lap["LapNumber"]
            dfls.append(df)
        if len(dfls) > 0:
            df = pd.concat(dfls).sort_values(by=["Date"])
            return df

    session = fetch_session(year, round_number, session_type)
    session.load()
    dfs = []
    for dr in session.drivers:
        try:
            this = _get_by_driver(session, dr)
            if this is None:
                continue
            this = this.assign(DriverNumber=dr)
        except (KeyError, ValueError) as e:
            continue
        dfs.append(this)
    df = pd.concat(dfs)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Position data (GPS) for a session

In [64]:
def fetch_session_positions(year, round_number, session_type):
    def _get_by_driver(session, driver_number):
        dfls = []
        for _, lap in session.laps.pick_driver(driver_number).iterlaps():
            df = lap.get_pos_data()
            df["LapNumber"] = lap["LapNumber"]
            dfls.append(df)
        if len(dfls) > 0:
            df = pd.concat(dfls).sort_values(by=["Date"])
            return df

    session = fetch_session(year, round_number, session_type)
    session.load()
    dfs = []
    for dr in session.drivers:
        try:
            this = _get_by_driver(session, dr)
            if this is None:
                continue
            this = this.assign(DriverNumber=dr)
        except (KeyError, ValueError) as e:
            continue
        dfs.append(this)
    df = pd.concat(dfs)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

## Run methods

The following subsections encapsulate methods that save information locally.

### Year schedule

In [65]:
def download_year_schedule(year, parent_folder, force=False):
    downloaded = []
    folder = Path(parent_folder)
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / (f"{year:04d}_schedule.parquet")
    if force or (not path.exists()):
        df = fetch_year_schedule(year=year)
        df.to_parquet(path)
        downloaded.append(path)
    return downloaded

### Single session

This will download and save all information about a given session.

In [66]:
def download_single_session(
    year, round_number, session_type, parent_folder, force=False
):
    downloaded = []
    folder = Path(parent_folder)
    folder.mkdir(parents=True, exist_ok=True)
    id_string = f"{year:04d}{round_number:02d}{session_type}"
    kwargs = dict(year=year, round_number=round_number, session_type=session_type)

    plan = [
        {"suffix": "drivers", "method": fetch_session_drivers},
        {"suffix": "laps", "method": fetch_session_laps},
        {"suffix": "weather", "method": fetch_session_weather},
        {"suffix": "race_control", "method": fetch_session_race_control},
        {"suffix": "results", "method": fetch_session_results},
        {"suffix": "status", "method": fetch_session_status},
        {"suffix": "telemetry", "method": fetch_session_telemetry},
        {"suffix": "positions", "method": fetch_session_positions},
    ]
    for plan_ in plan:
        path = folder / (f"{id_string}_{plan_['suffix']}.parquet")
        if force or (not path.exists()):
            df = plan_["method"](**kwargs)
            df.to_parquet(path)
            downloaded.append(path)
    return downloaded

### Single weekend

This will download and save all information about all sessions in a given weekend.

In [67]:
def download_single_weekend(year, round_number, parent_folder, force=False):
    downloaded = []
    session_types = ["FP1", "FP2", "FP3", "Q", "R", "S", "SS", "SQ"]
    for session_type in session_types:
        try:
            new = download_single_session(
                year=year,
                round_number=round_number,
                session_type=session_type,
                parent_folder=parent_folder,
                force=force,
            )
        except ValueError as e:
            print(f"Session not found: {year:04d}{round_number:02d}{session_type}")
            print(e)
            new = []
        except fastf1.core.DataNotLoadedError as e:
            print(f"Session not loaded: {year:04d}{round_number:02d}{session_type}")
            print(e)
            new = []
        downloaded += new
    return downloaded

## Examples

Some code snippets to run this data.

In [68]:
DOWNLOADED = []
SAVE_FOLDER = "../data/raw/"
CACHE_FOLDER = "../.cache/"


In [69]:
path_cache = Path(CACHE_FOLDER)
path_cache.mkdir(parents=True, exist_ok=True)
fastf1.Cache.enable_cache(path_cache)

In [70]:
for year in range(2024, 1949, -1):
    DOWNLOADED += download_year_schedule(year=year, parent_folder=SAVE_FOLDER)

In [79]:
for year in range(2020, 2025):
    for round_number in range(1):
        DOWNLOADED += download_single_weekend(
            year=year, round_number=round_number, parent_folder=SAVE_FOLDER
        )

core           INFO 	Loading data for Pre-Season Test 1 - Practice 1 [v2.3.0]


core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Test 1 - Practice 2 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Test 1 - Practice 3 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Test - Practice 1 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Test - Practice 2 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Test - Practice 3 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []


Session not found: 202000FP1
No objects to concatenate
Session not found: 202000FP2
No objects to concatenate
Session not found: 202000FP3
No objects to concatenate
Session not found: 202000Q
Session type 'Q' does not exist for this event
Session not found: 202000R
Session type 'R' does not exist for this event
Session not found: 202000S
Session type 'S' does not exist for this event
Session not found: 202000SS
Invalid session type 'SS'
Session not found: 202000SQ
Session type 'SQ' does not exist for this event
Session not found: 202100FP1
No objects to concatenate
Session not found: 202100FP2
No objects to concatenate
Session not found: 202100FP3
No objects to concatenate


core           INFO 	Loading data for Pre-Season Track Session - Practice 1 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Track Session - Practice 2 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Track Session - Practice 3 [v2.3.0]
core           INFO 	Finished loading data for 0 drivers: []


Session not found: 202100Q
Session type 'Q' does not exist for this event
Session not found: 202100R
Session type 'R' does not exist for this event
Session not found: 202100S
Session type 'S' does not exist for this event
Session not found: 202100SS
Invalid session type 'SS'
Session not found: 202100SQ
Session type 'SQ' does not exist for this event
Session not found: 202200FP1
No objects to concatenate
Session not found: 202200FP2
No objects to concatenate
Session not found: 202200FP3
No objects to concatenate
Session not found: 202200Q
Session type 'Q' does not exist for this event
Session not found: 202200R
Session type 'R' does not exist for this event
Session not found: 202200S
Session type 'S' does not exist for this event
Session not found: 202200SS
Invalid session type 'SS'
Session not found: 202200SQ
Session type 'SQ' does not exist for this event
Session not found: 202300Q
Session type 'Q' does not exist for this event
Session not found: 202300R
Session type 'R' does not exis

core           INFO 	Loading data for Pre-Season Testing - Practice 2 [v2.3.0]
api            INFO 	No cached data found for driver_info. Loading data...
api            INFO 	Fetching driver list...


Session not found: 202300SS
Invalid session type 'SS'
Session not found: 202300SQ
Session type 'SQ' does not exist for this event


api            INFO 	No cached data found for timing_data. Loading data...
api            INFO 	Fetching timing data...
api            INFO 	No cached data found for car_data. Loading data...
api            INFO 	Fetching car data...
api            INFO 	No cached data found for weather_data. Loading data...
api            INFO 	Fetching weather data...
api            INFO 	No cached data found for race_control_messages. Loading data...
api            INFO 	Fetching race control messages...
core           INFO 	Finished loading data for 0 drivers: []
core           INFO 	Loading data for Pre-Season Testing - Practice 3 [v2.3.0]
api            INFO 	No cached data found for driver_info. Loading data...
api            INFO 	Fetching driver list...


Session not found: 202400FP2
No objects to concatenate


api            INFO 	No cached data found for timing_data. Loading data...
api            INFO 	Fetching timing data...
api            INFO 	No cached data found for car_data. Loading data...
api            INFO 	Fetching car data...
api            INFO 	No cached data found for weather_data. Loading data...
api            INFO 	Fetching weather data...
api            INFO 	No cached data found for race_control_messages. Loading data...
api            INFO 	Fetching race control messages...
core           INFO 	Finished loading data for 0 drivers: []


Session not found: 202400FP3
No objects to concatenate
Session not found: 202400Q
Session type 'Q' does not exist for this event
Session not found: 202400R
Session type 'R' does not exist for this event
Session not found: 202400S
Session type 'S' does not exist for this event
Session not found: 202400SS
Invalid session type 'SS'
Session not found: 202400SQ
Session type 'SQ' does not exist for this event


In [80]:
for fl in sorted(DOWNLOADED):
    print(fl)

../data/raw/202300FP1_drivers.parquet
../data/raw/202300FP1_laps.parquet
../data/raw/202300FP1_positions.parquet
../data/raw/202300FP1_race_control.parquet
../data/raw/202300FP1_results.parquet
../data/raw/202300FP1_status.parquet
../data/raw/202300FP1_telemetry.parquet
../data/raw/202300FP1_weather.parquet
../data/raw/202300FP2_drivers.parquet
../data/raw/202300FP2_laps.parquet
../data/raw/202300FP2_positions.parquet
../data/raw/202300FP2_race_control.parquet
../data/raw/202300FP2_results.parquet
../data/raw/202300FP2_status.parquet
../data/raw/202300FP2_telemetry.parquet
../data/raw/202300FP2_weather.parquet
../data/raw/202300FP3_drivers.parquet
../data/raw/202300FP3_laps.parquet
../data/raw/202300FP3_positions.parquet
../data/raw/202300FP3_race_control.parquet
../data/raw/202300FP3_results.parquet
../data/raw/202300FP3_status.parquet
../data/raw/202300FP3_telemetry.parquet
../data/raw/202300FP3_weather.parquet
../data/raw/202400FP1_drivers.parquet
../data/raw/202400FP1_laps.parquet
