# Downloading data from fastf1

This notebook contains methods to download data from Formula One races using the "fastf1" package.

To run it, you can change the following parameters.

In [1]:
# Local path where files will be saved
SAVE_FOLDER = "../data/raw/"

# Cache folder (used by fastf1 API), to improve donwload performance
CACHE_FOLDER = "../.cache/"

# Races to download
RACES = [
    {'year': year, 'round_number': rd}
    for year in [2023]
    for rd in range(0, 30)
]

In [2]:
from pathlib import Path
import pandas as pd
import fastf1
from warnings import filterwarnings
filterwarnings("ignore")

## Auxiliary methods



In [3]:
def add_session_information(df, session, **kwargs):
    for key, val in kwargs.items():
        df[key] = val
    df["session_start_time"] = session.session_start_time
    df["t0_date"] = session.t0_date
    return df

## Methods from fastf1

The following subsections encapsulate methods to get specific types of information.

### Year schedule

This method will fetch the schedule of races for a given season.

In [4]:
def fetch_year_schedule(year):
    schedule = fastf1.get_event_schedule(year)
    schedule["year"] = year
    schedule["is_testing"] = schedule.is_testing()
    return schedule

### Session

In [5]:
def fetch_session(year, round_number, session_type):
    if round_number == 0:
        session = fastf1.get_testing_session(year, 1, session_type)
    elif round_number > 0:
        session = fastf1.get_session(year, round_number, session_type)
    return session

### Drivers that participated in a session

In [6]:
def fetch_session_drivers(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    info = [pd.DataFrame(session.get_driver(dr)) for dr in session.drivers]
    df = pd.concat(info, axis=1).T
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Laps driven in a session

In [7]:
def fetch_session_laps(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.laps)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Weather in a session

In [8]:
def fetch_session_weather(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.weather_data)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Race control messages in a session

In [9]:
def fetch_session_race_control(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.race_control_messages)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Results of a session

In [10]:
def fetch_session_results(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.results)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Status of a session

In [11]:
def fetch_session_status(year, round_number, session_type):
    session = fetch_session(year, round_number, session_type)
    session.load()
    df = pd.DataFrame(session.session_status)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Telemetry of a session

In [12]:
def fetch_session_telemetry(year, round_number, session_type):
    def _get_by_driver(session, driver_number):
        dfls = []
        for _, lap in session.laps.pick_driver(driver_number).iterlaps():
            df = lap.get_car_data()
            df["LapNumber"] = lap["LapNumber"]
            dfls.append(df)
        if len(dfls) > 0:
            df = pd.concat(dfls).sort_values(by=["Date"])
            return df

    session = fetch_session(year, round_number, session_type)
    session.load()
    dfs = []
    for dr in session.drivers:
        try:
            this = _get_by_driver(session, dr)
            if this is None:
                continue
            this = this.assign(DriverNumber=dr)
        except (KeyError, ValueError) as e:
            continue
        dfs.append(this)
    df = pd.concat(dfs)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

### Position data (GPS) for a session

In [13]:
def fetch_session_positions(year, round_number, session_type):
    def _get_by_driver(session, driver_number):
        dfls = []
        for _, lap in session.laps.pick_driver(driver_number).iterlaps():
            df = lap.get_pos_data()
            df["LapNumber"] = lap["LapNumber"]
            dfls.append(df)
        if len(dfls) > 0:
            df = pd.concat(dfls).sort_values(by=["Date"])
            return df

    session = fetch_session(year, round_number, session_type)
    session.load()
    dfs = []
    for dr in session.drivers:
        try:
            this = _get_by_driver(session, dr)
            if this is None:
                continue
            this = this.assign(DriverNumber=dr)
        except (KeyError, ValueError) as e:
            continue
        dfs.append(this)
    df = pd.concat(dfs)
    df = df.reset_index()
    df = add_session_information(
        df, session, year=year, round_number=round_number, session_type=session_type
    )
    return df

## Run methods

The following subsections encapsulate methods that save information locally.

### Year schedule

In [14]:
def download_year_schedule(year, parent_folder, force=False):
    downloaded = []
    folder = Path(parent_folder)
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / (f"{year:04d}_schedule.parquet")
    if force or (not path.exists()):
        df = fetch_year_schedule(year=year)
        df.to_parquet(path)
        downloaded.append(path)
    return downloaded

### Single session

This will download and save all information about a given session.

In [15]:
def download_single_session(
    year, round_number, session_type, parent_folder, force=False
):
    downloaded = []
    folder = Path(parent_folder)
    folder.mkdir(parents=True, exist_ok=True)
    id_string = f"{year:04d}{round_number:02d}{session_type}"
    kwargs = dict(year=year, round_number=round_number, session_type=session_type)

    plan = [
        {"suffix": "drivers", "method": fetch_session_drivers},
        {"suffix": "laps", "method": fetch_session_laps},
        {"suffix": "weather", "method": fetch_session_weather},
        {"suffix": "race_control", "method": fetch_session_race_control},
        {"suffix": "results", "method": fetch_session_results},
        {"suffix": "status", "method": fetch_session_status},
        {"suffix": "telemetry", "method": fetch_session_telemetry},
        {"suffix": "positions", "method": fetch_session_positions},
    ]
    for plan_ in plan:
        path = folder / (f"{id_string}_{plan_['suffix']}.parquet")
        if force or (not path.exists()):
            df = plan_["method"](**kwargs)
            df.to_parquet(path)
            downloaded.append(path)
    return downloaded

### Single weekend

This will download and save all information about all sessions in a given weekend.

In [16]:
def download_single_weekend(year, round_number, parent_folder, force=False):
    downloaded = []
    session_types = ["FP1", "FP2", "FP3", "Q", "R", "S", "SS", "SQ"]
    for session_type in session_types:
        try:
            new = download_single_session(
                year=year,
                round_number=round_number,
                session_type=session_type,
                parent_folder=parent_folder,
                force=force,
            )
        except ValueError as e:
            print(f"Session not found: {year:04d}{round_number:02d}{session_type}")
            print(e)
            new = []
        except fastf1.core.DataNotLoadedError as e:
            print(f"Session not loaded: {year:04d}{round_number:02d}{session_type}")
            print(e)
            new = []
        downloaded += new
    return downloaded

## Examples

Some code snippets to run this data.

In [17]:
DOWNLOADED = []

In [18]:
path_cache = Path(CACHE_FOLDER)
path_cache.mkdir(parents=True, exist_ok=True)
fastf1.Cache.enable_cache(path_cache)

In [19]:
years = set([race['year'] for race in RACES])
for year in sorted(years):
    DOWNLOADED += download_year_schedule(year=year, parent_folder=SAVE_FOLDER)

In [20]:
for race in RACES:
    DOWNLOADED += download_single_weekend(
        year=race['year'], round_number=race['round_number'], parent_folder=SAVE_FOLDER
    )

Session not found: 202300Q
Session type 'Q' does not exist for this event
Session not found: 202300R
Session type 'R' does not exist for this event
Session not found: 202300S
Session type 'S' does not exist for this event
Session not found: 202300SS
Invalid session type 'SS'
Session not found: 202300SQ
Session type 'SQ' does not exist for this event
Session not found: 202301S
Session type 'S' does not exist for this event
Session not found: 202301SS
Invalid session type 'SS'
Session not found: 202301SQ
Session type 'SQ' does not exist for this event
Session not found: 202302S
Session type 'S' does not exist for this event
Session not found: 202302SS
Invalid session type 'SS'
Session not found: 202302SQ
Session type 'SQ' does not exist for this event
Session not found: 202303S
Session type 'S' does not exist for this event
Session not found: 202303SS
Invalid session type 'SS'
Session not found: 202303SQ
Session type 'SQ' does not exist for this event
Session not found: 202304FP2
Session

core           INFO 	Loading data for Austrian Grand Prix - Practice 1 [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	No cached data found for position_data. Loading data...
api            INFO 	Fetching position data...
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '63', '55', '44', '31', '27', '24', '23', '22', '21', '20', '18', '16', '14', '11', '10', '4', '2', '77', '81']


Session not loaded: 202309FP1
The data you are trying to access has not been loaded yet. See `Session.load`
Session not found: 202309FP2
Session type 'FP2' does not exist for this event
Session not found: 202309FP3
Session type 'FP3' does not exist for this event
Session not found: 202309SS
Invalid session type 'SS'
Session not found: 202310S
Session type 'S' does not exist for this event
Session not found: 202310SS
Invalid session type 'SS'
Session not found: 202310SQ
Session type 'SQ' does not exist for this event
Session not found: 202311S
Session type 'S' does not exist for this event
Session not found: 202311SS
Invalid session type 'SS'
Session not found: 202311SQ
Session type 'SQ' does not exist for this event
Session not found: 202312FP2
Session type 'FP2' does not exist for this event
Session not found: 202312FP3
Session type 'FP3' does not exist for this event
Session not found: 202312SS
Invalid session type 'SS'
Session not found: 202313S
Session type 'S' does not exist for t

core           INFO 	Loading data for Qatar Grand Prix - Race [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	No cached data found for position_data. Loading data...
api            INFO 	Fetching position data...
api            INFO 	Parsing position data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for weather_data. Loading data...
api            INFO 	Fetching weather data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for race_control_messages. Loading data...
api            INFO 	Fetching race control messages...

Session not found: 202317SS
Invalid session type 'SS'


api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '1', '4', '63', '44', '55', '23', '14', '10', '77', '22', '16', '18', '20', '24', '27', '31', '11', '2', '40']
core           INFO 	Loading data for Qatar Grand Prix - Sprint [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            

Session not found: 202318FP2
Session type 'FP2' does not exist for this event
Session not found: 202318FP3
Session type 'FP3' does not exist for this event


api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for timing_data. Loading data...
api            INFO 	Fetching timing data...
api            INFO 	Parsing timing data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for timing_app_data. Loading data...
api            INFO 	Fetching timing app data...
api            INFO 	Data has been written to cache!
core           INFO 	Processing timing data...
api            INFO 	No cached data found for session_status_data. Loading data...
api            INFO 	Fetching session status data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for track_status_data. Loading data...
api            INFO 	Fetching track status data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for car_data. Loading data...
api            INFO 	Fetching car data...
api           

Session not found: 202318SS
Invalid session type 'SS'


api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '44', '16', '4', '11', '55', '10', '63', '23', '81', '31', '3', '14', '22', '27', '77', '24', '20', '2', '18']
core           INFO 	Loading data for United States Grand Prix - Sprint [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api     

Session not found: 202319S
Session type 'S' does not exist for this event
Session not found: 202319SS
Invalid session type 'SS'
Session not found: 202319SQ
Session type 'SQ' does not exist for this event
Session not found: 202320FP2
Session type 'FP2' does not exist for this event
Session not found: 202320FP3
Session type 'FP3' does not exist for this event
Session not found: 202320SS
Invalid session type 'SS'
Session not found: 202321S
Session type 'S' does not exist for this event
Session not found: 202321SS
Invalid session type 'SS'
Session not found: 202321SQ
Session type 'SQ' does not exist for this event
Session not found: 202322S
Session type 'S' does not exist for this event
Session not found: 202322SS
Invalid session type 'SS'
Session not found: 202322SQ
Session type 'SQ' does not exist for this event
Session not found: 202323FP1
Invalid round: 23
Session not found: 202323FP2
Invalid round: 23
Session not found: 202323FP3
Invalid round: 23
Session not found: 202323Q
Invalid ro

In [21]:
for fl in sorted(DOWNLOADED):
    print(fl)

../data/raw/202317R_drivers.parquet
../data/raw/202317R_laps.parquet
../data/raw/202317R_positions.parquet
../data/raw/202317R_race_control.parquet
../data/raw/202317R_results.parquet
../data/raw/202317R_status.parquet
../data/raw/202317R_telemetry.parquet
../data/raw/202317R_weather.parquet
../data/raw/202317SQ_drivers.parquet
../data/raw/202317SQ_laps.parquet
../data/raw/202317SQ_positions.parquet
../data/raw/202317SQ_race_control.parquet
../data/raw/202317SQ_results.parquet
../data/raw/202317SQ_status.parquet
../data/raw/202317SQ_telemetry.parquet
../data/raw/202317SQ_weather.parquet
../data/raw/202317S_drivers.parquet
../data/raw/202317S_laps.parquet
../data/raw/202317S_positions.parquet
../data/raw/202317S_race_control.parquet
../data/raw/202317S_results.parquet
../data/raw/202317S_status.parquet
../data/raw/202317S_telemetry.parquet
../data/raw/202317S_weather.parquet
../data/raw/202318FP1_drivers.parquet
../data/raw/202318FP1_laps.parquet
../data/raw/202318FP1_positions.parquet
