In [6]:
import fastf1

In [7]:
track_locations = {
    "sakhir": {
        "latitude": 26.0325,
        "longitude": 50.5106,
        "elevation_m": 7
    },
    "jeddah": {
        "latitude": 21.6319,
        "longitude": 39.1044,
        "elevation_m": 3
    },
    "melbourne": {
        "latitude": -37.8497,
        "longitude": 144.9683,
        "elevation_m": 9
    },
    "imola": {
        "latitude": 44.3439,
        "longitude": 11.7167,
        "elevation_m": 47
    },
    "miami": {
        "latitude": 25.9588,
        "longitude": -80.2389,
        "elevation_m": 2
    },
    "barcelona": {
        "latitude": 41.57,
        "longitude": 2.2611,
        "elevation_m": 90
    },
    "monaco": {
        "latitude": 43.7347,
        "longitude": 7.4206,
        "elevation_m": 52
    },
    "baku": {
        "latitude": 40.3725,
        "longitude": 49.8533,
        "elevation_m": -28
    },
    "montreal": {
        "latitude": 45.5,
        "longitude": -73.5225,
        "elevation_m": 10
    },
    "silverstone": {
        "latitude": 52.07,
        "longitude": -1.016,
        "elevation_m": 140
    },
    "spielberg": {
        "latitude": 47.2197,
        "longitude": 15.7644,
        "elevation_m": 660
    },
    "le castellet": {
        "latitude": 43.2506,
        "longitude": 5.7903,
        "elevation_m": 400
    },
    "budapest": {
        "latitude": 47.5833,
        "longitude": 19.2486,
        "elevation_m": 171
    },
    "spa-francorchamps": {
        "latitude": 50.4372,
        "longitude": 5.9714,
        "elevation_m": 443
    },
    "zandvoort": {
        "latitude": 52.3882,
        "longitude": 4.5407,
        "elevation_m": 9
    },
    "monza": {
        "latitude": 45.6156,
        "longitude": 9.2811,
        "elevation_m": 162
    },
    "marina bay": {
        "latitude": 1.2914,
        "longitude": 103.8641,
        "elevation_m": 15
    },
    "suzuka": {
        "latitude": 34.8431,
        "longitude": 136.5411,
        "elevation_m": 50
    },
    "austin": {
        "latitude": 30.1328,
        "longitude": -97.6411,
        "elevation_m": 180
    },
    "mexico city": {
        "latitude": 19.4042,
        "longitude": -99.0901,
        "elevation_m": 2250
    },
    "sao paulo": {
        "latitude": -23.7036,
        "longitude": -46.6997,
        "elevation_m": 750
    },
    "yas island": {
        "latitude": 24.4672,
        "longitude": 54.6031,
        "elevation_m": 10
    },
    "las vegas": {
        "latitude": 36.1416,
        "longitude": -115.1719,
        "elevation_m": 610
    },
    "lusail": {
        "latitude": 25.49,
        "longitude": 51.4542,
        "elevation_m": 5
    },
    "shanghai": {
        "latitude": 31.3389,
        "longitude": 121.2197,
        "elevation_m": 5
    }
}

Factors for ML Model to consider:

- Starting positions
- Gap to pole
- Teammate starting positions
- Weather (Wind, Rain, Average Track temp and air temp throughout full race)
- Retired
- Downforce index (may have to hardcode this) // for later
- Tyre wear index (may have to hardcode this) // for later

In [8]:
# Calculate average wind direction
def calculate_wind_speed(weather, time):

    wind_speed_data = weather["wind_speed_10m"]
    
    wind_speed_total = 0
    for i in range(time.hour, time.hour + 4):
        wind_speed_total += wind_speed_data[i]

    return wind_speed_total / 4

In [9]:
# Calculate average air temp
def calculate_apparent_temp(weather, time):

    apparent_temperature_data = weather["apparent_temperature"]
    
    apparent_temperature_total = 0
    for i in range(time.hour, time.hour + 4):
        apparent_temperature_total += apparent_temperature_data[i]

    return apparent_temperature_total / 4

In [10]:
# Calculate average track temp
def calculate_relative_humidity(weather, time):
    relative_humidity_data = weather["relative_humidity_2m"]
    
    relative_humidity_total = 0
    for i in range(time.hour, time.hour + 4):
        relative_humidity_total += relative_humidity_data[i]

    return relative_humidity_total / 4

In [11]:
# Calculate rain amount before race in mm
def calculate_rain_amount_before(weather, time):
    rain_data = weather["rain"]
    
    rain_amount = 0
    for i in range(time.hour, time.hour - 5, -1):
        rain_amount += rain_data[i]

    return rain_amount

In [12]:
def calculate_rain_amount_during(weather, time):
    rain_data = weather["rain"]
    
    rain_amount = 0
    for i in range(time.hour, time.hour + 4):
        rain_amount += rain_data[i]

    return rain_amount

In [13]:
# Get gap to pole for each driver
def calculate_gap_to_pole_position(qualifying):
    
    q1,q2,q3 = qualifying.laps.split_qualifying_sessions()
    q1_pole_time = q1.pick_fastest()["LapTime"]
    q2_pole_time = q2.pick_fastest()["LapTime"]
    q3_pole_time = q3.pick_fastest()["LapTime"]

    pole_gaps = {}

    for driver in qualifying.results.iterrows():
        driver = driver[1]
        if pd.isna(driver["Q3"]): 
            if pd.isna(driver["Q2"]):
                if pd.isna(driver["Q1"]):
                    pole_gaps[driver["DriverNumber"]] = q3_pole_time
                else:
                    gap = driver["Q1"] - q3_pole_time
                    if gap.total_seconds() < 0:
                        gap = driver["Q1"] - q1_pole_time
                    pole_gaps[driver["DriverNumber"]] = gap
            else:
                gap = driver["Q2"] - q3_pole_time
                if gap.total_seconds() < 0:
                    gap = driver["Q2"] - q2_pole_time
                pole_gaps[driver["DriverNumber"]] = gap
        else:
            gap = driver["Q3"] - q3_pole_time
            pole_gaps[driver["DriverNumber"]] = gap
    
    return pole_gaps

In [14]:
# Get teammate qualifying position for each driver
def get_teammate_qualifying_position(qualifying):

    teammate_qualifying_pos = {}

    for driver in qualifying.results.iterrows():
        driver = driver[1]
        team_id = driver["TeamId"]
        for teammate in qualifying.results.iterrows():
            teammate = teammate[1]
            if driver["DriverNumber"] == teammate["DriverNumber"]:
                continue

            if teammate["TeamId"] == team_id:
                teammate_qualifying_pos[driver["DriverNumber"]] = teammate["Position"]
                break

    return teammate_qualifying_pos

In [15]:
# Get drivers own qualifying positions for each driver
def get_qualifying_positions(qualifying):

    driver_qualifying_pos = {}

    for driver in qualifying.results.iterrows():
        driver = driver[1]

        driver_qualifying_pos[driver["DriverNumber"]] = driver["Position"]

    return driver_qualifying_pos

In [16]:
# Get drivers own race finishing positions for each driver
def get_race_finishing_positions(race):

    driver_finishing_pos = {}

    for driver in race.results.iterrows():
        driver = driver[1]

        driver_finishing_pos[driver["DriverNumber"]] = driver["Position"]
    
    return driver_finishing_pos

In [None]:
def get_date_and_time(event):
    date_and_time = ""
    for i in range(1, 7):
        if event[f"Session{i}"] == "Race":
            date_and_time = event[f"Session{i}DateUtc"]
            break
    
    date = date_and_time.date()
    time = date_and_time.time()

    return (date, time)

In [None]:
import requests
from datetime import timedelta

def get_hourly_weather_info(event, date, time):
    track_info = track_locations[event["Location"].lower()]

    url = f"https://historical-forecast-api.open-meteo.com/v1/forecast?latitude={track_info["latitude"]}&longitude={track_info["longitude"]}&start_date={date - timedelta(days=1)}&end_date={date + timedelta(days=1)}&hourly=rain,relative_humidity_2m,apparent_temperature,wind_speed_10m"
    results = requests.get(url).json()
    
    return results["hourly"]

CSV File columns:

- FullName
- RoundNumber
- Season
- Location
- Rain
- WindSpeed
- AirTemp
- TrackTemp
- QualifyingPos
- StartingPos
- GapToPole
- TeammateQualifyingPos
- Retired (True or false)

In [27]:
import csv
import pandas as pd
import numpy as np
from datetime import date

date_today = date.today()
fieldnames = [
    "DriverId", "TeamId", "Season", "RoundNumber", "RacesInGEEra",
    "LocationId", "RainBefore", "RainDuring", "WindSpeed", "ApparentTemp", 
    "RelativeHumidity", "StartingPos", "QualifyingPos", "GapToPole", 
    "TeammateQualifyingPos", "Retired", "FinishingPos"
]

team_ids = {
    "Red Bull Racing" : 1,
    "Mercedes" : 2,
    "Ferrari" : 3,
    "McLaren" : 4,
    "Alpine" : 5,
    "AlphaTauri" : 6,
    "RB" : 6,
    "Racing Bulls" : 6,
    "Alfa Romeo" : 7,
    "Kick Sauber" : 7,
    "Haas F1 Team" : 8,
    "Williams" : 9,
    "Aston Martin" : 10
}

driver_ids = {}
driver_id_count = 1

location_ids = {}
location_id_count = 1

races_in_ge_era = 1

for season in range(2022, 2026):
    for event in fastf1.get_event_schedule(season).iterrows():
        event = event[1]
        if event["EventFormat"] == "testing":
            continue

        if event["EventDate"] > pd.Timestamp(date_today):
            continue
        
        location = event["Location"]

        if location == "Montréal":
            location = "Montreal"
        elif location == "São Paulo":
            location = "Sao Paulo"

        event["Location"] = location
        
        date, time = get_date_and_time(event)
        weather = get_hourly_weather_info(event, date, time)

        qualifying = fastf1.get_session(season, event["OfficialEventName"], "Q")
        qualifying.load(weather=True)
        race = fastf1.get_session(season, event["OfficialEventName"], "R")
        race.load(weather=True)

        rain_amount_before = calculate_rain_amount_before(weather, time)
        rain_amount_during = calculate_rain_amount_during(weather, time)
        average_wind_speed = calculate_wind_speed(weather, time)
        average_apparent_temp = calculate_apparent_temp(weather, time)
        average_relative_humidity = calculate_relative_humidity(weather, time)
        
        driver_finishing_pos = get_race_finishing_positions(race)
        driver_qualifying_pos = get_qualifying_positions(qualifying)
        pole_gaps = calculate_gap_to_pole_position(qualifying)
        teammate_qualifying_pos = get_teammate_qualifying_position(qualifying)

        location_id = -1

        if location in location_ids:
            location_id = location_ids[location]
        else:
            location_ids[location] = location_id_count
            location_id = location_id_count
            location_id_count += 1

        # Write all data in a CSV file
        
        with open(f"{season}/{location}_race.csv", "w", newline="") as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            for driver in race.results.iterrows():
                driver = driver[1]

                driver_id = -1
                if driver["FullName"] in driver_ids:
                    driver_id = driver_ids[driver["FullName"]]
                else:
                    driver_ids[driver["FullName"]] = driver_id_count
                    driver_id = driver_id_count
                    driver_id_count += 1

                data = {
                    "DriverId" : driver_id,
                    "TeamId" : team_ids[driver["TeamName"]],
                    "Season" : season,
                    "RoundNumber" : event["RoundNumber"],
                    "RacesInGEEra" : races_in_ge_era,
                    "LocationId" : location_id,
                    "RainBefore" : rain_amount_before,
                    "RainDuring" : rain_amount_during,
                    "WindSpeed" : average_wind_speed,
                    "ApparentTemp" : average_apparent_temp,
                    "RelativeHumidity" : average_relative_humidity,
                    "StartingPos" : driver["GridPosition"],
                    "QualifyingPos" : driver_qualifying_pos[driver["DriverNumber"]],
                    "GapToPole" : pd.to_timedelta(pole_gaps[driver["DriverNumber"]]).total_seconds(),
                    "TeammateQualifyingPos" : teammate_qualifying_pos[driver["DriverNumber"]] if driver["DriverNumber"] in teammate_qualifying_pos else 0,
                    "Retired" : not (driver["Status"] in ["Finished", "Lapped"]),
                    "FinishingPos" : driver_finishing_pos[driver["DriverNumber"]]
                }

                writer.writerow(data)
        
        races_in_ge_era += 1


core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


KeyboardInterrupt: 

In [3]:
driver_ids

NameError: name 'driver_ids' is not defined

Code to get qualifying results from most recent qualifying session (before the race)


In [22]:
date, time = get_date_and_time(event)

import requests
from datetime import timedelta

def predict_hourly_weather_info(event, date, time):
    track_info = track_locations[event["Location"].lower()]
    print(track_info)

    url = f"https://api.open-meteo.com/v1/forecast?latitude={track_info["latitude"]}&longitude={track_info["longitude"]}&hourly=rain,relative_humidity_2m,apparent_temperature,wind_speed_10m&forecast_days=2"
    results = requests.get(url).json()
    
    return results["hourly"]

In [30]:
driver_ids = {
    'Charles Leclerc': 1,
    'Carlos Sainz': 2,
    'Lewis Hamilton': 3,
    'George Russell': 4,
    'Kevin Magnussen': 5,
    'Valtteri Bottas': 6,
    'Esteban Ocon': 7,
    'Yuki Tsunoda': 8,
    'Fernando Alonso': 9,
    'Guanyu Zhou': 10,
    'Mick Schumacher': 11,
    'Lance Stroll': 12,
    'Alexander Albon': 13,
    'Daniel Ricciardo': 14,
    'Lando Norris': 15,
    'Nicholas Latifi': 16,
    'Nico Hulkenberg': 17,
    'Sergio Perez': 18,
    'Max Verstappen': 19,
    'Pierre Gasly': 20,
    'Sebastian Vettel': 21,
    'Nyck De Vries': 22,
    'Logan Sargeant': 23,
    'Oscar Piastri': 24,
    'Liam Lawson': 25,
    'Oliver Bearman': 26,
    'Franco Colapinto': 27,
    'Jack Doohan': 28,
    'Andrea Kimi Antonelli': 29,
    'Gabriel Bortoleto': 30,
    'Isack Hadjar': 31,
    'Kimi Antonelli': 32
}

In [31]:
import csv
import pandas as pd
qualifying = fastf1.get_session(2025, "Hungary", "Q")
qualifying.load()
event = fastf1.get_event_schedule(2025).get_event_by_round(14)

date, time = get_date_and_time(event)
weather = predict_hourly_weather_info(event, date, time)
rain_amount_before = calculate_rain_amount_before(weather, time)
rain_amount_during = calculate_rain_amount_during(weather, time)
average_wind_speed = calculate_wind_speed(weather, time)
average_apparent_temp = calculate_apparent_temp(weather, time)
average_relative_humidity = calculate_relative_humidity(weather, time)

pole_gaps = calculate_gap_to_pole_position(qualifying)
teammate_qualifying_pos = get_teammate_qualifying_position(qualifying)

fieldnames = [
    "DriverId", "TeamId", "Season", "RoundNumber", "RacesInGEEra",
    "LocationId", "RainBefore", "RainDuring", "WindSpeed", "ApparentTemp", 
    "RelativeHumidity", "StartingPos", "QualifyingPos", "GapToPole", 
    "TeammateQualifyingPos", "Retired"
]

with open("2025/Hungary_race.csv", "w", newline="") as file:
    
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()

    for driver in qualifying.results.iterrows():
        driver = driver[1]

        data = {
            "DriverId" : driver_ids[driver["FullName"]],
            "TeamId" : team_ids[driver["TeamName"]],
            "Season" : season,
            "RoundNumber" : 14,
            "RacesInGEEra" : 82,
            "LocationId" : 10,
            "RainBefore" : rain_amount_before,
            "RainDuring" : rain_amount_during,
            "WindSpeed" : average_wind_speed,
            "ApparentTemp" : average_apparent_temp,
            "RelativeHumidity" : average_relative_humidity,
            "StartingPos" : driver["Position"],
            "QualifyingPos" : driver["Position"],
            "GapToPole" : pd.to_timedelta(pole_gaps[driver["DriverNumber"]]).total_seconds(),
            "TeammateQualifyingPos" : teammate_qualifying_pos[driver["DriverNumber"]] if driver["DriverNumber"] in teammate_qualifying_pos else 0,
            "Retired" : False
        }

        writer.writerow(data)

core           INFO 	Loading data for Hungarian Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '63', '14', '18', '5', '1', '30', '6', '87', '44', '55', '43', '12', '22', '10', '31', '27', '23']


{'latitude': 47.5833, 'longitude': 19.2486, 'elevation_m': 171}


In [34]:
weather["rain"]

[0.0,
 0.0,
 1.1,
 0.0,
 0.7,
 0.4,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.1,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [29]:
driver_ids

{}