In [None]:
import os 
import fastf1
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# enable cache to locally store data, so that when load data don't need to take them every time from the api 
cache_dir = os.path.expanduser('~/fastf1_cache')
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)

# I don't know if this is needed considering what we are doing now 

In [None]:
def fetch_and_append_event(year, event_name, 
                           session_types=["FP1", "FP2", "FP3", "Q", "R"], 
                           csv_path="data/f1_laps.csv"):
    df_list = []
    
    # Fetch sessions for the specified event
    for session_type in session_types:
        try:
            session = fastf1.get_session(year, event_name, session_type)
            session.load(laps=True)
            laps = session.laps.copy()
            results = session.results.copy()

            # Add some columns to keep track of the event
            
            laps["Session"] = session_type
            laps["Year"] = year
            laps["Round"] = session.event.RoundNumber
            laps["EventName"] = session.event.EventName

            df_list.append(laps)
        except Exception as e:
            print(f"Skipping {year} - {event_name} - {session_type} due to error: {e}")

    if not df_list:
        # No data collected, so just return without writing anything
        return

    # Combine all laps for this event
    event_df = pd.concat(df_list, ignore_index=True)

    # Check if the CSV file already exists in your data folder
    if not os.path.exists(csv_path):
        # If the file doesn't exist, create a new one (header included)
        event_df.to_csv(csv_path, index=False)
        print(f"Created new CSV at {csv_path} with data for {event_name} {year}.")
    else:
        # If the file does exist, append data (no header this time)
        event_df.to_csv(csv_path, mode='a', header=False, index=False)
        print(f"Appended data for {event_name} {year} to existing CSV at {csv_path}.")




In [4]:
# add 2025 events until now 
fetch_and_append_event(2025, "Australian Grand Prix")
fetch_and_append_event(2025, "Chinese Grand Prix", session_types=["FP1", "SQ", "SR",  "Q", "R"])

core           INFO 	Loading data for Australian Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '5', '6', '7', '10', '12', '14', '16', '18', '22', '23', '27', '30', '31', '44', '55', '63', '81', '87']
core           INFO 	Loading data for Australian Grand Prix - Practice 2 [v3.5.3]
req            INFO 	Usin

Created new CSV at data/f1_laps.csv with data for Australian Grand Prix 2025.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '5', '6', '7', '10', '12', '14', '16', '18', '22', '23', '27', '30', '31', '44', '55', '63', '81', '87']
core           INFO 	Loading data for Chinese Grand Prix - Sprint Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position

Skipping 2025 - Chinese Grand Prix - SR due to error: Invalid session type 'SR'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req        

Appended data for Chinese Grand Prix 2025 to existing CSV at data/f1_laps.csv.


In [None]:
# consider doing some work on data before actually appending it to the csv -> take from 3 practice session the fastest lap, 
# try to estimate the average time during a stint in practice 
# also add to dataset the Results data -> using SessionResults

In [None]:
def fetch_and_append_event(year, event_name, 
                           session_types=["FP1", "FP2", "FP3", "Q", "R"], 
                           csv_path="data/f1_laps.csv"):
    df_list = []
    
    # load data for all sessions 
    fp1_session = fastf1.get_session(year, event_name, "FP1")
    fp1_session.load(laps=True)
    fp1_laps = fp1_session.laps.copy()
    fp1_results = fp1_session.results.copy()
    fp1_laps["Session"] = "FP1"

    fp1 = fp1_results.copy()
    # get fastest lap for each driver in the session 
    fastest_laps = fp1_laps[fp1_laps["IsPersonalBest"] == True]
    # get average lap time for best stint of each driver -> include also variation between first and last lap and number of laps of the stint (maybe better consider stint with more laps)
    
    # merge all in one single dataframe 
    fp1 = fp1.merge(fastest_laps, on="Driver", how="left", suffixes=("", "_Fastest"))




    fp2_session = fastf1.get_session(year, event_name, "FP2")
    fp2_session.load(laps=True)
    fp2_laps = fp2_session.laps.copy()
    fp2_results = fp2_session.results.copy()

    fp3_session = fastf1.get_session(year, event_name, "FP3")
    fp3_session.load(laps=True)
    fp3_laps = fp3_session.laps.copy()
    fp3_results = fp3_session.results.copy()

    quali_session = fastf1.get_session(year, event_name, "Q")
    quali_session.load(laps=True)
    quali_laps = quali_session.laps.copy()
    quali_results = quali_session.results.copy()

    race_session = fastf1.get_session(year, event_name, "R")
    race_session.load(laps=True)
    race_results = race_session.results.copy()



    # Fetch sessions for the specified event
    for session_type in session_types:
        try:
            session = fastf1.get_session(year, event_name, session_type)
            session.load(laps=True)
            laps = session.laps.copy()
            results = session.results.copy()

            # Add some columns to keep track of the event
            
            laps["Session"] = session_type
            laps["Year"] = year
            laps["Round"] = session.event.RoundNumber
            laps["EventName"] = session.event.EventName

            df_list.append(laps)
        except Exception as e:
            print(f"Skipping {year} - {event_name} - {session_type} due to error: {e}")

    if not df_list:
        # No data collected, so just return without writing anything
        return

    # Combine all laps for this event
    event_df = pd.concat(df_list, ignore_index=True)

    # Check if the CSV file already exists in your data folder
    if not os.path.exists(csv_path):
        # If the file doesn't exist, create a new one (header included)
        event_df.to_csv(csv_path, index=False)
        print(f"Created new CSV at {csv_path} with data for {event_name} {year}.")
    else:
        # If the file does exist, append data (no header this time)
        event_df.to_csv(csv_path, mode='a', header=False, index=False)
        print(f"Appended data for {event_name} {year} to existing CSV at {csv_path}.")


