In [1]:
import fastf1
import numpy as np
import pandas as pd 
import os 
import matplotlib.pyplot as plt 
import sklearn 
import time 

In [4]:
# enable cache to locally store data, so that when load data don't need to take them every time from the api 
cache_dir = os.path.expanduser('~/fastf1_cache')
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)

In [7]:
# load data from one event 
df = []

session_types = ["FP1", "FP2", "FP3", "Q", "R"]
for session_type in session_types:
    session = fastf1.get_session(2025, "Australia", session_type)
    session.load(laps=True)

    # laps data
    laps = session.laps.copy()
    laps['Session'] = session_type
    

    df.append(laps)

# concatenate all laps data
df = pd.concat(df, ignore_index=True)


core           INFO 	Loading data for Australian Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '5', '6', '7', '10', '12', '14', '16', '18', '22', '23', '27', '30', '31', '44', '55', '63', '81', '87']
core           INFO 	Loading data for Australian Grand Prix - Practice 2 [v3.5.3]
req            INFO 	Usin

In [8]:
df.info()

<class 'fastf1.core.Laps'>
RangeIndex: 2549 entries, 0 to 2548
Data columns (total 32 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                2549 non-null   timedelta64[ns]
 1   Driver              2549 non-null   object         
 2   DriverNumber        2549 non-null   object         
 3   LapTime             2126 non-null   timedelta64[ns]
 4   LapNumber           2549 non-null   float64        
 5   Stint               2549 non-null   float64        
 6   PitOutTime          452 non-null    timedelta64[ns]
 7   PitInTime           444 non-null    timedelta64[ns]
 8   Sector1Time         2506 non-null   timedelta64[ns]
 9   Sector2Time         2528 non-null   timedelta64[ns]
 10  Sector3Time         2258 non-null   timedelta64[ns]
 11  Sector1SessionTime  2505 non-null   timedelta64[ns]
 12  Sector2SessionTime  2528 non-null   timedelta64[ns]
 13  Sector3SessionTime  2258 non-null   timedelt

In [9]:
df.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,Session
0,0 days 00:17:15.002000,VER,1,0 days 00:01:58.029000,1.0,1.0,0 days 00:15:28.184000,NaT,0 days 00:00:50.194000,0 days 00:00:19.878000,...,Red Bull Racing,0 days 00:15:28.184000,2025-03-14 01:30:56.193,1,,False,,False,False,FP1
1,0 days 00:18:34.773000,VER,1,0 days 00:01:19.771000,2.0,1.0,NaT,NaT,0 days 00:00:27.951000,0 days 00:00:17.793000,...,Red Bull Racing,0 days 00:17:15.002000,2025-03-14 01:32:43.011,1,,False,,False,True,FP1
2,0 days 00:20:58.688000,VER,1,0 days 00:02:23.915000,3.0,1.0,NaT,NaT,0 days 00:00:49.204000,0 days 00:00:37.267000,...,Red Bull Racing,0 days 00:18:34.773000,2025-03-14 01:34:02.782,1,,False,,False,True,FP1
3,0 days 00:22:17.837000,VER,1,0 days 00:01:19.149000,4.0,1.0,NaT,NaT,0 days 00:00:27.343000,0 days 00:00:17.455000,...,Red Bull Racing,0 days 00:20:58.688000,2025-03-14 01:36:26.697,1,,False,,False,True,FP1
4,0 days 00:24:46.613000,VER,1,0 days 00:02:28.776000,5.0,1.0,NaT,NaT,0 days 00:00:51.751000,0 days 00:00:43.081000,...,Red Bull Racing,0 days 00:22:17.837000,2025-03-14 01:37:45.846,1,,False,,False,True,FP1


In [10]:
df = df[df["Deleted"] != True]
df.drop(columns=["Deleted", "DeletedReason", "FastF1Generated", "DriverNumber", "Team"], inplace=True)

In [11]:
df.info()

<class 'fastf1.core.Laps'>
Index: 2538 entries, 0 to 2548
Data columns (total 27 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                2538 non-null   timedelta64[ns]
 1   Driver              2538 non-null   object         
 2   LapTime             2115 non-null   timedelta64[ns]
 3   LapNumber           2538 non-null   float64        
 4   Stint               2538 non-null   float64        
 5   PitOutTime          451 non-null    timedelta64[ns]
 6   PitInTime           443 non-null    timedelta64[ns]
 7   Sector1Time         2495 non-null   timedelta64[ns]
 8   Sector2Time         2517 non-null   timedelta64[ns]
 9   Sector3Time         2247 non-null   timedelta64[ns]
 10  Sector1SessionTime  2494 non-null   timedelta64[ns]
 11  Sector2SessionTime  2517 non-null   timedelta64[ns]
 12  Sector3SessionTime  2247 non-null   timedelta64[ns]
 13  SpeedI1             2398 non-null   float64      

In [4]:
# FOR NOW LOAD DATA 2025, NEED TO SOLVE ISSUE OF TOO MANY API CALLS WHEN TRYING TO ACCESS EVERYTHING
# List to store session DataFrames
dfs = []

# Define the session types you want to load (adjust as needed)
session_types = ['FP1', 'FP2', 'FP3', 'Q', 'R']

# Loop over each year from 2018 to 2025 (inclusive)
for year in range(2018, 2026):
    # Get the event schedule for the year; this returns a DataFrame
    schedule = fastf1.get_event_schedule(year)
    for _, event in schedule.iterrows():
        event_name = event['EventName']  # Adjust key if necessary
        for session_type in session_types:
            try:
                # Get and load the session data
                time.sleep(5)
                session = fastf1.get_session(year, event_name, session_type)
                session.load()  # Loads the session data

                # Get the laps data (or any other data you need)
                laps_df = session.laps.copy()
                # Add metadata for later identification
                laps_df['Year'] = year
                laps_df['EventName'] = event_name
                laps_df['SessionType'] = session_type
                
                dfs.append(laps_df)
            except Exception as e:
                # If a session isn't available (e.g., a missing practice session), skip it
                print(f"Skipping {session_type} for {event_name} in {year}: {e}")

# Combine all the individual DataFrames into one
all_sessions_df = pd.concat(dfs, ignore_index=True)

# Now all_sessions_df contains the data from every session from 2018 to 2025.
print(all_sessions_df.head())


core           INFO 	Loading data for Australian Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	No cached data found for position_data. Loading data...
_api           INFO 	Fetching position data...
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['2', '3', '5', '7', '8', '9', '10', '11', '14', '16', '18', '20', '27', '28', '31', '33', '35', '44', '55', '77']
core           INFO 	Loading data for Aus

Skipping FP3 for United States Grand Prix in 2018: The data you are trying to access has not been loaded yet. See `Session.load`


core           INFO 	Loading data for United States Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
req            INFO 	No cached dat

Skipping Q for United States Grand Prix in 2018: The data you are trying to access has not been loaded yet. See `Session.load`


core           INFO 	Loading data for United States Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found

Skipping R for United States Grand Prix in 2018: The data you are trying to access has not been loaded yet. See `Session.load`


core           INFO 	Loading data for Mexican Grand Prix - Practice 1 [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
req            INFO 	No cached data foun

Skipping FP1 for Mexican Grand Prix in 2018: The data you are trying to access has not been loaded yet. See `Session.load`


core           INFO 	Loading data for Mexican Grand Prix - Practice 2 [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
req            INFO 	No cached data foun

Skipping FP2 for Mexican Grand Prix in 2018: The data you are trying to access has not been loaded yet. See `Session.load`


core           INFO 	Loading data for Mexican Grand Prix - Practice 3 [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
req            INFO 	No cached data foun

Skipping FP3 for Mexican Grand Prix in 2018: The data you are trying to access has not been loaded yet. See `Session.load`


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping Q for Mexican Grand Prix in 2018: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping R for Mexican Grand Prix in 2018: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping FP1 for Brazilian Grand Prix in 2018: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping FP2 for Brazilian Grand Prix in 2018: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping FP3 for Brazilian Grand Prix in 2018: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping Q for Brazilian Grand Prix in 2018: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Skipping R for Brazilian Grand Prix in 2018: Failed to load any schedule data.


KeyboardInterrupt: 

In [4]:
all_sessions_df.info()

<class 'fastf1.core.Laps'>
RangeIndex: 4387 entries, 0 to 4386
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                4387 non-null   timedelta64[ns]
 1   Driver              4387 non-null   object         
 2   DriverNumber        4387 non-null   object         
 3   LapTime             3736 non-null   timedelta64[ns]
 4   LapNumber           4387 non-null   float64        
 5   Stint               4387 non-null   float64        
 6   PitOutTime          653 non-null    timedelta64[ns]
 7   PitInTime           645 non-null    timedelta64[ns]
 8   Sector1Time         4169 non-null   timedelta64[ns]
 9   Sector2Time         4365 non-null   timedelta64[ns]
 10  Sector3Time         4089 non-null   timedelta64[ns]
 11  Sector1SessionTime  4165 non-null   timedelta64[ns]
 12  Sector2SessionTime  4365 non-null   timedelta64[ns]
 13  Sector3SessionTime  4089 non-null   timedelt

In [14]:
all_sessions_df.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,Year,EventName,SessionType


In [13]:
all_sessions_df = all_sessions_df[all_sessions_df["Deleted"] != "True"]
all_sessions_df.info()

<class 'fastf1.core.Laps'>
Index: 0 entries
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                0 non-null      timedelta64[ns]
 1   Driver              0 non-null      object         
 2   DriverNumber        0 non-null      object         
 3   LapTime             0 non-null      timedelta64[ns]
 4   LapNumber           0 non-null      float64        
 5   Stint               0 non-null      float64        
 6   PitOutTime          0 non-null      timedelta64[ns]
 7   PitInTime           0 non-null      timedelta64[ns]
 8   Sector1Time         0 non-null      timedelta64[ns]
 9   Sector2Time         0 non-null      timedelta64[ns]
 10  Sector3Time         0 non-null      timedelta64[ns]
 11  Sector1SessionTime  0 non-null      timedelta64[ns]
 12  Sector2SessionTime  0 non-null      timedelta64[ns]
 13  Sector3SessionTime  0 non-null      timedelta64[ns]
 14  SpeedI