Test

In [1]:
import fastf1

Factors for ML Model to consider:

- Starting positions
- Gap to pole
- Teammate starting positions
- Weather (Wind, Rain, Average Track temp and air temp throughout full race)
- Retired
- Downforce index (may have to hardcode this) // for later
- Tyre wear index (may have to hardcode this) // for later

In [2]:
# Calculate average wind direction
def calculate_wind_speed(race):

    wind_speed = race.weather_data["WindSpeed"]
    average_wind_speed = round(sum(wind_speed) / len(wind_speed), 5)

    return average_wind_speed

In [3]:
# Calculate average air temp
def calculate_air_temp(race):

    air_temp = race.weather_data["AirTemp"]
    average_air_temp = round(sum(air_temp) / len(air_temp), 5)

    return average_air_temp

In [4]:
# Calculate average track temp
def calculate_track_temp(race):
    track_temp = race.weather_data["TrackTemp"]
    average_track_temp = round(sum(track_temp) / len(track_temp), 5)

    return average_track_temp

In [5]:
# Calculate rain percentage
def calculate_rain_percentage(race):
    rain_data = race.weather_data["Rainfall"]
    rain_count = 0

    for rain in rain_data:
        rain_count = rain_count + 1 if rain else rain_count

    rain_percentage = round(rain_count / len(rain_data) * 100, 5)

    return rain_percentage

In [6]:
# Get gap to pole for each driver
def calculate_gap_to_pole_position(qualifying):
    
    _,__,q3 = qualifying.laps.split_qualifying_sessions()
    pole_time = q3.pick_fastest()["LapTime"]

    pole_gaps = {}

    for driver in qualifying.results.iterrows():
        driver = driver[1]
        if pd.isna(driver["Q3"]): 
            if pd.isna(driver["Q2"]):
                if pd.isna(driver["Q1"]):
                    pole_gaps[driver["DriverNumber"]] = pd.NaT
                else:
                    gap = driver["Q1"] - pole_time
                    pole_gaps[driver["DriverNumber"]] = gap if gap.total_seconds() >= 0 else pd.NaT 
            else:
                gap = driver["Q2"] - pole_time
                pole_gaps[driver["DriverNumber"]] = gap if gap.total_seconds() >= 0 else pd.NaT
        else:
            gap = driver["Q3"] - pole_time
            pole_gaps[driver["DriverNumber"]] = gap if gap.total_seconds() >= 0 else pd.NaT
    
    return pole_gaps

In [7]:
# Get teammate qualifying position for each driver
def get_teammate_qualifying_position(qualifying):

    teammate_qualifying_pos = {}

    for driver in qualifying.results.iterrows():
        driver = driver[1]
        team_id = driver["TeamId"]
        for teammate in qualifying.results.iterrows():
            teammate = teammate[1]
            if driver["DriverNumber"] == teammate["DriverNumber"]:
                continue

            if teammate["TeamId"] == team_id:
                teammate_qualifying_pos[driver["DriverNumber"]] = teammate["Position"]
                break

    return teammate_qualifying_pos

In [8]:
# Get drivers own qualifying positions for each driver
def get_qualifying_positions(qualifying):

    driver_qualifying_pos = {}

    for driver in qualifying.results.iterrows():
        driver = driver[1]

        driver_qualifying_pos[driver["DriverNumber"]] = driver["Position"]

    return driver_qualifying_pos

In [9]:
# Get drivers own race finishing positions for each driver
def get_race_finishing_positions(race):

    driver_finishing_pos = {}

    for driver in race.results.iterrows():
        driver = driver[1]

        driver_finishing_pos[driver["DriverNumber"]] = driver["Position"]
    
    return driver_finishing_pos

CSV File columns:

- FullName
- RoundNumber
- Season
- Location
- Rain
- WindSpeed
- AirTemp
- TrackTemp
- QualifyingPos
- StartingPos
- GapToPole
- TeammateQualifyingPos
- Retired (True or false)

In [31]:
import csv
import pandas as pd
import numpy as np
from datetime import date

date_today = date.today()
fieldnames = [
    "DriverNumber", "FullName", "TeamName", "Season", "RoundNumber", "RacesInGEEra", "Location", "Rain", "WindSpeed", "AirTemp",
    "TrackTemp", "QualifyingPos", "StartingPos", "GapToPole", "TeammateQualifyingPos",
    "Retired", "FinishingPos"
]

team_ids = {
    "Red Bull Racing" : 1,
    "Mercedes" : 2,
    "Ferrari" : 3,
    "McLaren" : 4,
    "Alpine" : 5,
    "AlphaTauri" : 6,
    "Alfa Romeo" : 7,
    "Haas F1 Team" : 8,
    "Williams" : 9,
    "Aston Martin" : 10,
    "RB" : 6,
    "Kick Sauber" : 7,
    "Racing Bulls" : 6  
}

races_in_ge_era = 1

for season in range(2022, 2026):
    for event in fastf1.get_event_schedule(season).iterrows():
        
        if event[1]["EventFormat"] == "testing":
            continue

        if event[1]["EventDate"] != pd.Timestamp(date_today):
            continue
        
        qualifying = fastf1.get_session(season, event[1]["OfficialEventName"], "Q")
        qualifying.load(weather=True)
        race = fastf1.get_session(season, event[1]["OfficialEventName"], "R")
        race.load(weather=True)

        rain_percentage = calculate_rain_percentage(race)
        average_wind_speed = calculate_wind_speed(race)
        average_air_temp = calculate_air_temp(race)
        average_track_temp = calculate_track_temp(race)
        driver_finishing_pos = get_race_finishing_positions(race)
        driver_qualifying_pos = get_qualifying_positions(qualifying)
        pole_gaps = calculate_gap_to_pole_position(qualifying)
        teammate_qualifying_pos = get_teammate_qualifying_position(qualifying)
        location = event[1]["Location"]

        if location == "Montréal":
            location = "Montreal"
        elif location == "São Paulo":
            location = "Sao Paulo"

        # Write all data in a CSV file
        
        with open(f"{season}/{location}_race.csv", "w", newline="") as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            for driver in race.results.iterrows():
                driver = driver[1]

                data = {
                    "DriverNumber" : int(driver["DriverNumber"]),
                    "FullName" : driver["FullName"],
                    "TeamName" : team_ids[driver["TeamName"]],
                    "Season" : season,
                    "RoundNumber" : event[1]["RoundNumber"],
                    "RacesInGEEra" : races_in_ge_era,
                    "Location" : location,
                    "Rain" : rain_percentage,
                    "WindSpeed" : average_wind_speed,
                    "AirTemp" : average_air_temp,
                    "TrackTemp" : average_track_temp,
                    "QualifyingPos" : driver_qualifying_pos[driver["DriverNumber"]],
                    "StartingPos" : (driver["GridPosition"]),
                    "GapToPole" : pd.to_timedelta(pole_gaps[driver["DriverNumber"]]).total_seconds(),
                    "TeammateQualifyingPos" : teammate_qualifying_pos[driver["DriverNumber"]] if driver["DriverNumber"] in teammate_qualifying_pos else 0,
                    "Retired" : not (driver["Status"] in ["Finished", "Lapped"]),
                    "FinishingPos" : driver_finishing_pos[driver["DriverNumber"]]
                }

                writer.writerow(data)
        
        races_in_ge_era += 1


core           INFO 	Loading data for Belgian Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '16', '1', '23', '63', '22', '6', '30', '5', '31', '87', '10', '27', '55', '44', '43', '12', '14', '18']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.0]
req            INFO 	No cached data 

In [18]:
print(pole_gaps[driver["DriverNumber"]].total_seconds())

0.0


In [28]:
test = fastf1.get_session(2025, fastf1.get_event_schedule(2025).get_event_by_round(13)["OfficialEventName"], "R")

In [29]:
test

2025 Season Round 13: Belgian Grand Prix - Race