In [1]:
import fastf1
import numpy as np
import pandas as pd 
import os 
import matplotlib.pyplot as plt 
import sklearn 


In [2]:
# enable cache to locally store data, so that when load data don't need to take them every time from the api 
cache_dir = os.path.expanduser('~/fastf1_cache')
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)

In [14]:
# load data from one race to understand types of data we are working with 

# quali data 
session = fastf1.get_session(2025, "Australia", "FP1")
session.load()


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


ValueError: Failed to load any schedule data.

In [5]:
drivers = {"LEC", "HAM", "VER", "TSU", "HAD", "LAW", "ANT", "RUS", 
           "NOR", "PIA", "ALO", "STR", "HUL", "OCO", "GAS", "DOO",
           "ALB", "SAI", "BEA", "BOR"
           }

In [6]:
laps = session.laps
laps.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
0,0 days 00:17:15.002000,VER,1,0 days 00:01:58.029000,1.0,1.0,0 days 00:15:28.184000,NaT,0 days 00:00:50.194000,0 days 00:00:19.878000,...,True,Red Bull Racing,0 days 00:15:28.184000,2025-03-14 01:30:56.193,1,,False,,False,False
1,0 days 00:18:34.773000,VER,1,0 days 00:01:19.771000,2.0,1.0,NaT,NaT,0 days 00:00:27.951000,0 days 00:00:17.793000,...,True,Red Bull Racing,0 days 00:17:15.002000,2025-03-14 01:32:43.011,1,,False,,False,True
2,0 days 00:20:58.688000,VER,1,0 days 00:02:23.915000,3.0,1.0,NaT,NaT,0 days 00:00:49.204000,0 days 00:00:37.267000,...,True,Red Bull Racing,0 days 00:18:34.773000,2025-03-14 01:34:02.782,1,,False,,False,True
3,0 days 00:22:17.837000,VER,1,0 days 00:01:19.149000,4.0,1.0,NaT,NaT,0 days 00:00:27.343000,0 days 00:00:17.455000,...,True,Red Bull Racing,0 days 00:20:58.688000,2025-03-14 01:36:26.697,1,,False,,False,True
4,0 days 00:24:46.613000,VER,1,0 days 00:02:28.776000,5.0,1.0,NaT,NaT,0 days 00:00:51.751000,0 days 00:00:43.081000,...,True,Red Bull Racing,0 days 00:22:17.837000,2025-03-14 01:37:45.846,1,,False,,False,True


In [15]:
# FOR NOW LOAD DATA 2025, NEED TO SOLVE ISSUE OF TOO MANY API CALLS WHEN TRYING TO ACCESS EVERYTHING
# List to store session DataFrames
dfs = []

# Define the session types you want to load (adjust as needed)
session_types = ['FP1', 'FP2', 'FP3', 'Q', 'R']

# Loop over each year from 2018 to 2025 (inclusive)
for year in range(2025, 2026):
    # Get the event schedule for the year; this returns a DataFrame
    schedule = fastf1.get_event_schedule(year)
    for _, event in schedule.iterrows():
        event_name = event['EventName']  # Adjust key if necessary
        for session_type in session_types:
            try:
                # Get and load the session data
                session = fastf1.get_session(year, event_name, session_type)
                session.load()  # Loads the session data

                # Get the laps data (or any other data you need)
                laps_df = session.laps.copy()
                # Add metadata for later identification
                laps_df['Year'] = year
                laps_df['EventName'] = event_name
                laps_df['SessionType'] = session_type
                
                dfs.append(laps_df)
            except Exception as e:
                # If a session isn't available (e.g., a missing practice session), skip it
                print(f"Skipping {session_type} for {event_name} in {year}: {e}")

# Combine all the individual DataFrames into one
all_sessions_df = pd.concat(dfs, ignore_index=True)

# Now all_sessions_df contains the data from every session from 2018 to 2025.
print(all_sessions_df.head())


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


ValueError: Failed to load any schedule data.

In [4]:
all_sessions_df.info()

<class 'fastf1.core.Laps'>
RangeIndex: 4387 entries, 0 to 4386
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                4387 non-null   timedelta64[ns]
 1   Driver              4387 non-null   object         
 2   DriverNumber        4387 non-null   object         
 3   LapTime             3736 non-null   timedelta64[ns]
 4   LapNumber           4387 non-null   float64        
 5   Stint               4387 non-null   float64        
 6   PitOutTime          653 non-null    timedelta64[ns]
 7   PitInTime           645 non-null    timedelta64[ns]
 8   Sector1Time         4169 non-null   timedelta64[ns]
 9   Sector2Time         4365 non-null   timedelta64[ns]
 10  Sector3Time         4089 non-null   timedelta64[ns]
 11  Sector1SessionTime  4165 non-null   timedelta64[ns]
 12  Sector2SessionTime  4365 non-null   timedelta64[ns]
 13  Sector3SessionTime  4089 non-null   timedelt

In [14]:
all_sessions_df.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,Year,EventName,SessionType


In [13]:
all_sessions_df = all_sessions_df[all_sessions_df["Deleted"] != "True"]
all_sessions_df.info()

<class 'fastf1.core.Laps'>
Index: 0 entries
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                0 non-null      timedelta64[ns]
 1   Driver              0 non-null      object         
 2   DriverNumber        0 non-null      object         
 3   LapTime             0 non-null      timedelta64[ns]
 4   LapNumber           0 non-null      float64        
 5   Stint               0 non-null      float64        
 6   PitOutTime          0 non-null      timedelta64[ns]
 7   PitInTime           0 non-null      timedelta64[ns]
 8   Sector1Time         0 non-null      timedelta64[ns]
 9   Sector2Time         0 non-null      timedelta64[ns]
 10  Sector3Time         0 non-null      timedelta64[ns]
 11  Sector1SessionTime  0 non-null      timedelta64[ns]
 12  Sector2SessionTime  0 non-null      timedelta64[ns]
 13  Sector3SessionTime  0 non-null      timedelta64[ns]
 14  SpeedI