In [None]:
import fastf1
import numpy as np
import pandas as pd
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- LOAD SESSION --- #
year = 2025

schedule = fastf1.get_event_schedule(year)
gp_events = schedule[schedule['EventName'].str.contains("Grand Prix")]['EventName'].tolist()
gp_events = schedule[
    (schedule['EventName'].str.contains("Grand Prix")) &
    (schedule['EventDate'] <= datetime.datetime.now())
]['EventName'].tolist()

sessions = {}

def load_session(event):
    try:
        session = fastf1.get_session(year, event, 'R')
        session.load(telemetry=True, laps=True, weather=True)
        return event, session
    except Exception as e:
        print(f"Cannot load {event}.")
        return None

with ThreadPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(load_session, e): e for e in gp_events}
    for future in as_completed(futures):
        result = future.result()
        if result is not None:
            event, session = result
            sessions[event] = session

core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.0]
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.6.0]
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.6.0]
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for session_info. Loading data...
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Using cached data for session_info
_api           INFO 	Fetching session info data...
req            INFO 	Using cached data for driver_info
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been writ

req            INFO 	No cached data found for position_data. Loading data...
_api           INFO 	Fetching position data...
req            INFO 	Data has been written to cache!
--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 1163, in emit
    stream.write(msg + self.terminator)
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 694, in write
    self._schedule_flush()
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 590, in _schedule_flush
    self.pub_thread.schedule(_schedule_in_thread)
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 267, in schedule
    self._event_pipe.send(b"")
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/zmq/sugar/socket.py", line 701, in send
    return super().send(data,

Cannot load British Grand Prix.Cannot load Belgian Grand Prix.


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 1163, in emit
    stream.write(msg + self.terminator)
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 694, in write
    self._schedule_flush()
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 590, in _schedule_flush
    self.pub_thread.schedule(_schedule_in_thread)
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 267, in schedule
    self._event_pipe.send(b"")
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/zmq/

Cannot load Hungarian Grand Prix.

req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 1163, in emit
    stream.write(msg + self.terminator)
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 694, in write
    self._schedule_flush()
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 590, in _schedule_flush
    self.pub_thread.schedule(_schedule_in_thread)
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/ipykernel/iostream.py", line 267, in schedule
    self._event_pipe.send(b"")
  File "/Users/divyanshjain/Library/Python/3.12/lib/python/site-packages/zmq/sugar/socket.py

In [None]:
year = 2025

schedule = fastf1.get_event_schedule(year)
gp_events = schedule[schedule['EventName'].str.contains("Grand Prix")]['EventName'].tolist()
gp_events = schedule[
    (schedule['EventName'].str.contains("Grand Prix")) &
    (schedule['EventDate'] <= datetime.datetime.now())
]['EventName'].tolist()

df = pd.DataFrame()
rows = []

def pick_median(laps):
    valid_laps = laps.dropna(subset=['LapTime']).reset_index(drop=True)
    if valid_laps.empty: return None
    times = valid_laps['LapTime'].dt.total_seconds()
    median_time = np.median(times)
    median_lap_index = (times - median_time).abs().argsort().iloc[0]
    return valid_laps.iloc[median_lap_index]

import requests
import pandas as pd
from bs4 import BeautifulSoup

url = "https://formulapedia.com/how-long-are-f1-races/"
track_info = {}

response = requests.get(url)

soup = BeautifulSoup(response.content, "html.parser")

tbl = soup.find("tbody")
row = tbl.find_all('tr')

for r in row:
    raceName = r.find('td', class_="column-1")
    circuitLength = r.find('td', class_="column-3")
    raceDistance = r.find('td', class_="column-4")
    numLaps = r.find('td', class_="column-5")

    track_info[raceName.text] = {"circuitLength": circuitLength.text, "raceDistance": raceDistance.text, "numLaps": numLaps.text}


{'Monaco Grand Prix': {'circuitLength': '3.337km', 'raceDistance': '260.286km', 'numLaps': '78'}, 'Dutch Grand Prix': {'circuitLength': '4.259km', 'raceDistance': '306.648km', 'numLaps': '72'}, 'Austrian Grand Prix': {'circuitLength': '4.318km', 'raceDistance': '306.452km', 'numLaps': '71'}, 'Mexican Grand Prix': {'circuitLength': '4.304km', 'raceDistance': '305.354km', 'numLaps': '71'}, 'Brazilian Grand Prix': {'circuitLength': '4.309km', 'raceDistance': '305.879km', 'numLaps': '71'}, 'Canadian Grand Prix': {'circuitLength': '4.361km', 'raceDistance': '305.270km', 'numLaps': '70'}, 'Hungarian Grand Prix': {'circuitLength': '4.381km', 'raceDistance': '306.630km', 'numLaps': '70'}, 'Spanish Grand Prix': {'circuitLength': '4.655km', 'raceDistance': '307.104km', 'numLaps': '66'}, 'Singapore Grand Prix': {'circuitLength': '5.063km', 'raceDistance': '308.706km', 'numLaps': '61'}, 'Australian Grand Prix': {'circuitLength': '5.303km', 'raceDistance': '307.574km', 'numLaps': '58'}, 'Bahrain Gr

In [None]:
for event in gp_events:
    session = sessions[event]
    weather = session.weather_data
    results = session.results
    
    for num in session.drivers:
        driver = session.get_driver(num)['Abbreviation']
        team = session.get_driver(num)['TeamName']
        position = results.loc[results['Abbreviation'] == driver, 'Position'].values[0]
        
        driver_laps_all = session.laps.pick_driver(driver)
        if driver_laps_all.empty:
            continue
        
        #TODO filter out wet weather laps
        driver_laps_race = driver_laps_all[(driver_laps_all['TrackStatus'] == '1') 
                                        & (driver_laps_all['PitInTime'].isna()) 
                                        & (driver_laps_all['PitOutTime'].isna()) 
                                        & (driver_laps_all['LapNumber'] > 1)].dropna(subset='LapTime').reset_index(drop=True)
        #INCLUDES laps done under green flag conditions, EXCLUDES pit in, pit out laps, as well as first lap
        
        lap_median = pick_median(driver_laps_race)
        lap_fastest = driver_laps_race.pick_fastest()
        if lap_median is None or lap_fastest is None: continue
        tel_median = lap_median.get_car_data()

        corner_speeds = tel_median.query('Speed < 200')['Speed']
        straight_speeds = tel_median.query('Speed >= 200')['Speed']

        #Getting TopSpeed with and without DRS
        top_speed = 0
        top_speed_no_drs = 0
        for _, lap in driver_laps_race.iterlaps():
            tel = lap.get_car_data()
            if 'Speed' in tel:
                lap_top_speed = tel['Speed'].max()
                if lap_top_speed > top_speed:
                    top_speed = lap_top_speed
            else:
                continue
            
            drs = tel['DRS'].values
            if 'Speed' in tel and drs.max() < 2: # looks for laps with no DRS enabled
                lap_top_speed = tel['Speed'].max()
                if lap_top_speed > top_speed_no_drs:
                    top_speed_no_drs = lap_top_speed
            else:
                continue

        rows.append(
            {
                "Driver": driver,
                "Team": team,
                "Position": position,
                "TyreCompound": lap_fastest['Compound'] if 'Compound' in lap_fastest else None,
                "QualifyingTime": lap_fastest['LapTime'].total_seconds() if lap_fastest['LapTime'] else None, 
                "TopSpeed": top_speed,
                "TopSpeedNoDelta": top_speed_no_drs,
                "TrackLength": track_info[event]["circuitLength"],
                "TrackLength": track_info[event]["circuitLength"],
                "ThrottleMean": tel_median['Throttle'].mean(),
                "ThrottleStdDev": tel_median['Throttle'].std(),
                "BrakeMean": tel_median['Brake'].mean(),
                "BrakeStdDev": tel_median['Brake'].std(),
                "DRSMean": tel_median['DRS'].mean(),
            }
        )

df = pd.DataFrame(rows)

df.to_csv('f1.csv', index = False)