In [62]:
import fastf1
import pandas as pd
import time

In [14]:
# Get historical data. Doesn't include 2025.
year = 2022
meetings_df = pd.DataFrame()
while year<2025:
    meeting = fastf1.get_event_schedule(year)
    meeting = meeting.loc[:,["RoundNumber","Country","Location","EventFormat"]]
    meeting['Year'] = year
    meetings_df = pd.concat([meetings_df,meeting],axis=0)
    year += 1

condition = meetings_df["EventFormat"] == "testing"
meetings_df = meetings_df[~condition]


In [3]:
meetings_df

Unnamed: 0,RoundNumber,Country,Location,EventFormat,Year
2,1,Bahrain,Sakhir,conventional,2022
3,2,Saudi Arabia,Jeddah,conventional,2022
4,3,Australia,Melbourne,conventional,2022
5,4,Italy,Imola,sprint,2022
6,5,United States,Miami,conventional,2022
...,...,...,...,...,...
20,20,Mexico,Mexico City,conventional,2024
21,21,Brazil,São Paulo,sprint_qualifying,2024
22,22,United States,Las Vegas,conventional,2024
23,23,Qatar,Lusail,sprint_qualifying,2024


In [63]:
# Get current year (2025) data. Test data
year = 2025
meetings_2025_df = pd.DataFrame()

meeting = fastf1.get_event_schedule(year)
meeting = meeting.loc[:,["RoundNumber","Country","Location","EventFormat"]]
meeting['Year'] = year
meetings_2025_df = pd.concat([meetings_2025_df,meeting],axis=0)

condition = meetings_2025_df["EventFormat"] == "testing"
meetings_2025_df = meetings_2025_df[~condition]

In [64]:
meetings_2025_df

Unnamed: 0,RoundNumber,Country,Location,EventFormat,Year
1,1,Australia,Melbourne,conventional,2025
2,2,China,Shanghai,sprint_qualifying,2025
3,3,Japan,Suzuka,conventional,2025
4,4,Bahrain,Sakhir,conventional,2025
5,5,Saudi Arabia,Jeddah,conventional,2025
6,6,United States,Miami,sprint_qualifying,2025
7,7,Italy,Imola,conventional,2025
8,8,Monaco,Monaco,conventional,2025
9,9,Spain,Barcelona,conventional,2025
10,10,Canada,Montréal,conventional,2025


In [65]:
def get_event_info(year,round_num,event_format):
    
    qual = fastf1.get_session(year,round_num,'Q')
    qual.load()
    race = fastf1.get_session(year,round_num,'R')
    race.load()

    if event_format != "conventional":
        sprint = fastf1.get_session(year,round_num,'S')
        sprint.load()

    # For Qualifying 
    # data from session.results
    qual_results_df = qual.results.loc[:,["DriverNumber","Position"]]
    qual_results_df = qual_results_df.rename(columns={"Position":"Qual_Position"})

    # For Race
    # data from session.results
    race_results_df = race.results.loc[:,["DriverNumber","BroadcastName","TeamName","Position"]]

    # data from session.laps
    race_laps_df = race.laps.loc[:,["DriverNumber","Sector1Time","Sector2Time","Sector3Time","SpeedST","Stint"]]
    for sector in ['Sector1Time', 'Sector2Time', 'Sector3Time']: race_laps_df[sector] = race_laps_df[sector] / pd.Timedelta(seconds=1)
    final_race_laps_df = race_laps_df.groupby('DriverNumber').agg({
        'Sector1Time': 'mean',
        'Sector2Time': 'mean',
        'Sector3Time': 'mean',
        'SpeedST': 'mean',
        'Stint': 'max'
    }).reset_index()

    # outer join dfs --> complete data for a single race session 
    merged_race_df = pd.merge(race_results_df,final_race_laps_df,on="DriverNumber",how="outer")

    # Merge qual and race data 
    merged_qual_race_df = pd.merge(qual_results_df,merged_race_df,on="DriverNumber",how="outer")
    merged_qual_race_df = merged_qual_race_df.rename(columns={"Position":"Race_Position"})

    if event_format == "conventional":
        merged_qual_race_df["Sprint_Qual_Position"] = None
        merged_qual_race_df["Sprint_Race_Position"] = None
        merged_qual_race_df["Event_Type"] = "Race"

    elif event_format == "sprint":
        # Get Sprint data
        sprint_results_df = sprint.results.loc[:,["DriverNumber","Position"]]
        sprint_results_df = sprint_results_df.rename(columns={"Position":"Sprint_Race_Position"})

        # Merge Qual + Sprint + Race data
        merged_qual_race_df["Sprint_Qual_Position"] = merged_qual_race_df["Qual_Position"]
        merged_qual_race_df = pd.merge(merged_qual_race_df,sprint_results_df,on="DriverNumber",how="outer")

        merged_qual_race_df["Event_Type"] = "Sprint"

    elif event_format == "sprint_shootout" or event_format == "sprint_qualifying":
        # Get Sprint data
        sprint_results_df = sprint.results.loc[:,["DriverNumber","GridPosition","Position"]]
        sprint_results_df = sprint_results_df.rename(columns={"GridPosition":"Sprint_Qual_Position","Position":"Sprint_Race_Position"})

        # Merge Qual + Sprint + Race data
        merged_qual_race_df = pd.merge(merged_qual_race_df,sprint_results_df,on="DriverNumber",how="outer")

        merged_qual_race_df["Event_Type"] = "Sprint"

    else:
        raise Exception(f"Unknown event format: {event_format}")
    
    return merged_qual_race_df

In [45]:
# Get data for a all meetings in meetings_df
def get_data(meetings_df):  
    final_df = pd.DataFrame()
    for index,row in meetings_df.iterrows():
        round_num = row["RoundNumber"]
        country = row["Country"]
        location = row["Location"]
        EventFormat = row["EventFormat"]
        year = row["Year"]
             
        print(f"{index}: {round_num}, {country}, {location}, {EventFormat}, {year}")
        #time.sleep(5)
        
        meeting_data_df = get_event_info(year,round_num,EventFormat)

        meeting_data_df["Round_Number"] = round_num
        meeting_data_df["Country"] = country
        meeting_data_df["Location"] = location
        meeting_data_df["Year"] = year

        # reorganize columns
        new_col_order = ["DriverNumber","BroadcastName","TeamName","Round_Number","Country","Location","Year","Event_Type",
                        "Sprint_Qual_Position","Sprint_Race_Position","Qual_Position","Sector1Time","Sector2Time","Sector3Time",
                        "SpeedST","Stint","Race_Position"]
        meeting_data_df = meeting_data_df[new_col_order]

        final_df = pd.concat([final_df,meeting_data_df],axis=0)

    return final_df

In [10]:
#d = get_data()
#d.to_csv("test_data_2025.csv")

In [15]:
# Pre-load meeting data to cache. Returns all races that didn't load properly
def preload_cache(df):
        count = 0
        bad = []
        for _,row in df.iterrows():
                time.sleep(5)
                round_num = row["RoundNumber"]
                country = row["Country"]
                location = row["Location"]
                EventFormat = row["EventFormat"]
                year = row["Year"]
                
                if round_num >= 5:
                        print(f"{count}: {round_num}, {country}, {location}, {EventFormat}, {year}")

                        qual = fastf1.get_session(year,round_num,'Q')
                        qual.load()
                        race = fastf1.get_session(year,round_num,'R')
                        race.load()

                        if EventFormat != "conventional":
                                sprint = fastf1.get_session(year,round_num,'S')
                                sprint.load()

                        if len(race.results) == 0 or len(qual.results)==0 or(EventFormat != "conventional" and len(sprint.results)==0):
                                bad.append(f"{round_num}-{location}-{sprint}-{year}")

                count += 1
        return bad

just2024 = meetings_df.loc[meetings_df["Year"]==2024,:]
#preload_cache(just2024)

In [25]:
# Combine training data sets
trn_data_2022 = pd.read_csv("train_data_2022.csv")
trn_data_2023 = pd.read_csv("train_data_2023.csv")
trn_data_2024 = pd.read_csv("train_data_2024.csv")

trn_data = pd.DataFrame()
trn_data = pd.concat([trn_data,trn_data_2022], axis=0, ignore_index=True)
trn_data = pd.concat([trn_data,trn_data_2023], axis=0, ignore_index=True)
trn_data = pd.concat([trn_data,trn_data_2024], axis=0, ignore_index=True)
trn_data = trn_data.drop(columns=['Unnamed: 0'])

In [31]:
trn_data.to_csv("train_data.csv",index=False)

In [None]:
# Clean bad index on test dataset
test_data = pd.read_csv("test_data_2025.csv")
test_data = test_data.drop(columns=['Unnamed: 0'])
test_data.to_csv("test_data.csv",index=False)

### Code To Generate New Test Data for predicting new races

In [77]:
test_data = pd.read_csv("test_data.csv")

In [None]:
new_race = meetings_2025_df.iloc[5,:] # edit which race you want to add here 
new_race

RoundNumber                    6
Country            United States
Location                   Miami
EventFormat    sprint_qualifying
Year                        2025
Name: 6, dtype: object

In [79]:
round_number = new_race['RoundNumber']     
country = new_race['Country']         
location = new_race['Location']       
EventFormat = new_race['EventFormat']    
year = new_race['Year']  

In [None]:
qual = fastf1.get_session(year,round_number,'Q')
qual.load()

if EventFormat != "conventional":
    sprint = fastf1.get_session(year,round_number,'S')
    sprint.load()

# For Qualifying 
# data from session.results
qual_results_df = qual.results.loc[:,["DriverNumber","BroadcastName","TeamName","Position"]]
qual_results_df = qual_results_df.rename(columns={"Position":"Qual_Position"})

# No data from race yet
qual_results_df['Sector1Time'] =  None
qual_results_df['Sector2Time'] = None
qual_results_df['Sector3Time'] = None
qual_results_df['SpeedST'] = None
qual_results_df['Stint'] = None
qual_results_df['Race_Position'] = None


if EventFormat == "conventional":
    qual_results_df["Sprint_Qual_Position"] = None
    qual_results_df["Sprint_Race_Position"] = None
    qual_results_df["Event_Type"] = "Race"

elif EventFormat == "sprint":
    # Get Sprint data
    sprint_results_df = sprint.results.loc[:,["DriverNumber","Position"]]
    sprint_results_df = sprint_results_df.rename(columns={"Position":"Sprint_Race_Position"})

    # Merge Qual + Sprint + Race data
    qual_results_df["Sprint_Qual_Position"] = qual_results_df["Qual_Position"]
    qual_results_df = pd.merge(qual_results_df,sprint_results_df,on="DriverNumber",how="outer")

    qual_results_df["Event_Type"] = "Sprint"

elif EventFormat == "sprint_shootout" or EventFormat == "sprint_qualifying":
    # Get Sprint data
    sprint_results_df = sprint.results.loc[:,["DriverNumber","GridPosition","Position"]]
    sprint_results_df = sprint_results_df.rename(columns={"GridPosition":"Sprint_Qual_Position","Position":"Sprint_Race_Position"})

    # Merge Qual + Sprint + Race data
    qual_results_df = pd.merge(qual_results_df,sprint_results_df,on="DriverNumber",how="outer")

    qual_results_df["Event_Type"] = "Sprint"

else:
    raise Exception(f"Unknown event format: {EventFormat}")
    
    #return merged_qual_race_df

core           INFO 	Loading data for Miami Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '12', '81', '63', '55', '23', '16', '31', '22', '6', '44', '5', '7', '30', '27', '14', '10', '18', '87']
core           INFO 	Loading data for Miami Grand Prix - Sprint [v3.5.3]
req            INFO 	Using cached data 

In [87]:
qual_results_df

Unnamed: 0,DriverNumber,BroadcastName,TeamName,Qual_Position,Sector1Time,Sector2Time,Sector3Time,SpeedST,Stint,Race_Position,Sprint_Qual_Position,Sprint_Race_Position,Event_Type
0,1,M VERSTAPPEN,Red Bull Racing,1.0,,,,,,,4.0,17.0,Sprint
1,10,P GASLY,Alpine,18.0,,,,,,,13.0,8.0,Sprint
2,12,K ANTONELLI,Mercedes,3.0,,,,,,,1.0,7.0,Sprint
3,14,F ALONSO,Aston Martin,17.0,,,,,,,10.0,18.0,Sprint
4,16,C LECLERC,Ferrari,8.0,,,,,,,6.0,20.0,Sprint
5,18,L STROLL,Aston Martin,19.0,,,,,,,16.0,5.0,Sprint
6,22,Y TSUNODA,Red Bull Racing,10.0,,,,,,,20.0,6.0,Sprint
7,23,A ALBON,Williams,7.0,,,,,,,8.0,11.0,Sprint
8,27,N HULKENBERG,Kick Sauber,16.0,,,,,,,11.0,9.0,Sprint
9,30,L LAWSON,Racing Bulls,15.0,,,,,,,14.0,13.0,Sprint


In [88]:
final_df = pd.DataFrame()


qual_results_df["Round_Number"] = round_number
qual_results_df["Country"] = country
qual_results_df["Location"] = location
qual_results_df["Year"] = year

# reorganize columns
new_col_order = ["DriverNumber","BroadcastName","TeamName","Round_Number","Country","Location","Year","Event_Type",
                "Sprint_Qual_Position","Sprint_Race_Position","Qual_Position","Sector1Time","Sector2Time","Sector3Time",
                "SpeedST","Stint","Race_Position"]
qual_results_df = qual_results_df[new_col_order]

final_df = pd.concat([final_df,qual_results_df],axis=0)

    

In [91]:
test_data_with_new_race = pd.concat([test_data,final_df],axis=0,ignore_index=True)

  test_data_with_new_race = pd.concat([test_data,final_df],axis=0,ignore_index=True)


In [94]:
test_data_with_new_race.tail(25)

Unnamed: 0,DriverNumber,BroadcastName,TeamName,Round_Number,Country,Location,Year,Event_Type,Sprint_Qual_Position,Sprint_Race_Position,Qual_Position,Sector1Time,Sector2Time,Sector3Time,SpeedST,Stint,Race_Position
95,6,I HADJAR,Racing Bulls,5,Saudi Arabia,Jeddah,2025,Race,,,14.0,36.547224,30.3754,31.65172,309.9,2.0,10.0
96,63,G RUSSELL,Mercedes,5,Saudi Arabia,Jeddah,2025,Race,,,3.0,36.171102,30.07406,31.6125,305.04,2.0,5.0
97,7,J DOOHAN,Alpine,5,Saudi Arabia,Jeddah,2025,Race,,,17.0,36.986604,30.833408,31.773286,306.408163,3.0,17.0
98,81,O PIASTRI,McLaren,5,Saudi Arabia,Jeddah,2025,Race,,,2.0,35.84551,30.03194,31.44368,301.22,2.0,1.0
99,87,O BEARMAN,Haas F1 Team,5,Saudi Arabia,Jeddah,2025,Race,,,15.0,36.60798,30.68156,31.51304,305.38,2.0,13.0
100,1,M VERSTAPPEN,Red Bull Racing,6,United States,Miami,2025,Sprint,4.0,17.0,1.0,,,,,,
101,10,P GASLY,Alpine,6,United States,Miami,2025,Sprint,13.0,8.0,18.0,,,,,,
102,12,K ANTONELLI,Mercedes,6,United States,Miami,2025,Sprint,1.0,7.0,3.0,,,,,,
103,14,F ALONSO,Aston Martin,6,United States,Miami,2025,Sprint,10.0,18.0,17.0,,,,,,
104,16,C LECLERC,Ferrari,6,United States,Miami,2025,Sprint,6.0,20.0,8.0,,,,,,


In [95]:
test_data_with_new_race.to_csv("new_race_data.csv",index=False)

### Code To Generate New Test Data including new races