# Testing

In [1]:
import duckdb
import plotly.io as pio
import os

pio.renderers.default = "notebook" 

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
os.chdir(project_root)


In [2]:
circuits = duckdb.sql("""SELECT * FROM 'data/*_circuit_profiles.csv'""").df()
circuits_skipped = duckdb.sql("""SELECT * FROM 'data/*_circuit_skipped.csv'""").df()

In [3]:
drivers = duckdb.sql("""SELECT * FROM 'data/*_driver_profiles.csv'""").df()
drivers_skipped = duckdb.sql("""SELECT * FROM 'data/*_driver_skipped.csv'""").df()

In [4]:
#circuits.loc[circuits.year==2025]#.event.unique()

In [5]:
drivers

Unnamed: 0,grand_prix,location,driver,max_throttle_ratio,compound,tyre_age,is_fresh_tyre,avg_rainfall,avg_track_temp,avg_air_temp,braking_events,session_uid,degradation_slope,brake_max_g,brake_avg_g,drs_activations,year,session,event,session_date
0,Bahrain Grand Prix,Sakhir,VER,0.672572,MEDIUM,4.0,True,0.314404,34.443352,19.874515,1,2022_Sakhir_FP1,1.351709,5.096840,3.720912,4,2022,FP1,Bahrain Grand Prix,2022-03-18 12:00:00
1,Bahrain Grand Prix,Sakhir,RIC,0.656020,MEDIUM,9.0,False,0.216327,34.615646,19.884354,3,2022_Sakhir_FP1,2.795683,4.704502,3.037434,4,2022,FP1,Bahrain Grand Prix,2022-03-18 12:00:00
2,Bahrain Grand Prix,Sakhir,NOR,0.670590,MEDIUM,9.0,False,0.392663,34.600000,19.900000,4,2022_Sakhir_FP1,-0.950770,6.724997,3.628876,4,2022,FP1,Bahrain Grand Prix,2022-03-18 12:00:00
3,Bahrain Grand Prix,Sakhir,LAT,0.641292,SOFT,2.0,True,0.000000,34.185475,19.965363,6,2022_Sakhir_FP1,3.588000,5.781138,3.414592,3,2022,FP1,Bahrain Grand Prix,2022-03-18 12:00:00
4,Bahrain Grand Prix,Sakhir,GAS,0.672562,SOFT,2.0,True,0.000000,33.855890,19.755890,3,2022_Sakhir_FP1,1.515045,4.640641,3.431127,4,2022,FP1,Bahrain Grand Prix,2022-03-18 12:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7687,British Grand Prix,Silverstone,SAI,0.624334,MEDIUM,11.0,True,0.000000,24.528468,19.523844,1,2025_Silverstone_R,-2.311845,7.078944,3.712361,0,2025,R,British Grand Prix,2025-07-06 14:00:00
7688,British Grand Prix,Silverstone,HAD,0.534634,INTERMEDIATE,9.0,True,0.000000,24.770817,19.200000,1,2025_Silverstone_R,4.736600,5.309208,4.073941,0,2025,R,British Grand Prix,2025-07-06 14:00:00
7689,British Grand Prix,Silverstone,RUS,0.680683,HARD,13.0,True,0.000000,24.496254,19.448127,1,2025_Silverstone_R,-2.116248,4.028186,2.663007,0,2025,R,British Grand Prix,2025-07-06 14:00:00
7690,British Grand Prix,Silverstone,PIA,0.671654,MEDIUM,8.0,True,0.000000,24.400000,19.343852,0,2025_Silverstone_R,-2.835950,3.282056,2.922746,0,2025,R,British Grand Prix,2025-07-06 14:00:00


In [6]:
#drivers.loc[(drivers.grand_prix=='British Grand Prix')&(drivers.year==2025)&(drivers.driver.isin(['LEC','HAM','RUS','ANT']))&(drivers.compound=='MEDIUM')]

In [7]:
circuits_skipped

Unnamed: 0,year,event,session,reason
0,2023,Austrian Grand Prix,FP1,ValueError: Failed to compute corner profile: ...


In [8]:
drivers_skipped

Unnamed: 0,year,event,session,reason
0,2022,Dutch Grand Prix,FP2,no driver features returned
1,2023,Austrian Grand Prix,FP1,no driver features returned
2,2023,Hungarian Grand Prix,FP1,no driver features returned


# Driver minisectors/sectors

In [35]:
import os
import fastf1 as ff1
import pandas as pd
from scipy.spatial import cKDTree


def get_corner_area(session: ff1.session.Session, max_attempts: int = 5) -> dict[int, float]:
    """
    Compute apex distances for every corner on the circuit.

    Finds a single valid lap with positional data, merges its X/Y track
    coordinates with the lap’s distance timeline, and then uses a KD-tree
    to snap each corner (from circuit info) to its nearest telemetry point.

    Parameters:
        session: A loaded FastF1 session.
        max_attempts: How many fastest laps to try before giving up.

    Returns:
        A dict mapping `corner_index` → `apex_distance_m` along the lap.

    Raises:
        RuntimeError: If no lap with valid position data is found.
    """
    # 1) Find a lap with position data
    fast_laps = session.laps.pick_quicklaps().sort_values("LapTime")
    valid_lap = None
    for i, lap in enumerate(fast_laps.itertuples()):
        if i >= max_attempts:
            break
        try:
            _ = session.pos_data[lap.DriverNumber]
            valid_lap = session.laps.loc[lap.Index]
            break
        except Exception:
            continue
    if valid_lap is None:
        raise RuntimeError("No lap with valid position data found.")

    # 2) Merge pos (X, Y) with distance timeline
    pos = valid_lap.get_pos_data().copy()
    car = valid_lap.get_car_data().add_distance().copy()
    pos["t"] = pos["Time"].dt.total_seconds()
    car["t"] = car["Time"].dt.total_seconds()

    merged = pd.merge_asof(
        pos[["t", "X", "Y"]].sort_values("t"),
        car[["t", "Distance"]].sort_values("t"),
        on="t",
        direction="nearest"
    ).dropna(subset=["X", "Y", "Distance"])

    # 3) KD-tree corners → nearest telemetry point → distance
    tree = cKDTree(merged[["X", "Y"]].values)
    corners = (
        session
        .get_circuit_info()
        .corners
        .dropna(subset=["X", "Y"])
        .reset_index()
    )
    coords = corners[["X", "Y"]].values
    _, idxs = tree.query(coords, k=1)

    apex_distances = merged.iloc[idxs]["Distance"].to_numpy()
    corner_indices = corners["index"].to_numpy()
    return dict(zip(corner_indices, apex_distances))


def get_detailed_lap_telemetry(
    lap: fastf1.laps.Lap,
    corner_dists: dict[int, float],
    corner_window: float = 100.0
) -> pd.DataFrame:
    """
    Tag a single lap’s telemetry with sector and corner numbers.

    Retrieves raw telemetry, computes distance & relative distance,
    labels each sample with sector (1–3) and corner (0 if none),
    and adds driver & lap identifiers.

    Parameters:
        lap: A FastF1 Lap object.
        corner_dists: Mapping of corner_index → apex_distance_m.
        corner_window: Range (±m) around each apex to flag as in-corner.

    Returns:
        A DataFrame with columns:
            DriverNumber, LapNumber, Time, RPM, nGear, Throttle, Brake,
            DRS, Distance, RelativeDistance, Sector, Corner
    """
    tel = lap.get_telemetry().add_distance().add_relative_distance()

    # Sector boundaries
    t0 = lap["LapStartTime"]
    t_s1 = t0 + lap["Sector1Time"]
    t_s2 = t_s1 + lap["Sector2Time"]

    s1_dist = tel.loc[tel["Time"] <= t_s1, "Distance"].max()
    s2_dist = tel.loc[tel["Time"] <= t_s2, "Distance"].max()

    # Label sectors
    tel["Sector"] = 3
    tel.loc[tel["Distance"] <= s1_dist, "Sector"] = 1
    tel.loc[
        (tel["Distance"] > s1_dist) & (tel["Distance"] <= s2_dist),
        "Sector"
    ] = 2

    # Label corners
    tel["Corner"] = 0
    for corner_idx, apex_dist in corner_dists.items():
        in_corner = tel["Distance"].between(
            apex_dist - corner_window, apex_dist + corner_window
        )
        tel.loc[in_corner, "Corner"] = int(corner_idx)

    # Add identifiers
    tel["DriverNumber"] = lap.DriverNumber
    tel["LapNumber"] = lap["LapNumber"]

    return tel[[
        "DriverNumber", "LapNumber", "Time", "RPM", "nGear",
        "Throttle", "Brake", "DRS", "Distance", "RelativeDistance",
        "Sector", "Corner"
    ]]


def get_detailed_telemetry(session: ff1.session.Session) -> pd.DataFrame:
    """
    Aggregate telemetry for all valid laps in a session.

    Excludes out-laps and pit-laps, tags each sample with sector & corner.

    Parameters:
        session: A loaded FastF1 session.

    Returns:
        A concatenated DataFrame of detailed telemetry for every lap.
    """
    corner_dists = get_corner_area(session)
    valid_laps = session.laps.pick_wo_box()
    all_frames = [
        get_detailed_lap_telemetry(lap, corner_dists)
        for _, lap in valid_laps.iterlaps()
    ]
    return pd.concat(all_frames, ignore_index=True)


In [48]:
pd.set_option('display.max_columns', None)

cache_dir = "data/.fastf1_cache"
os.makedirs(cache_dir, exist_ok=True)
ff1.Cache.enable_cache(cache_dir)

session = ff1.get_session(2025, 'Austrian Grand Prix', 'FP1')
session.load(telemetry=True, laps=True)

df = get_detailed_telemetry(session)

core           INFO 	Loading data for Austrian Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '5', '6', '10', '12', '14', '18', '22', '23', '27', '30', '31', '38', '43', '44', '55', '63', '81', '87', '89']


In [51]:
df.loc[(df.DriverNumber=='81')&(df.LapNumber==2)]#.Corner.unique()

Unnamed: 0,DriverNumber,LapNumber,Time,RPM,nGear,Throttle,Brake,DRS,Distance,RelativeDistance,Sector,Corner
227518,81,2.0,0 days 00:00:00,11453.028571,7,99.0,False,12,0.000000,0.000000,1,0
227519,81,2.0,0 days 00:00:00.212000,11506.785649,7,99.0,False,12,16.968412,0.003937,1,0
227520,81,2.0,0 days 00:00:00.272000,11522.000000,7,99.0,False,12,21.785079,0.005055,1,0
227521,81,2.0,0 days 00:00:00.373000,11585.124960,7,99.0,False,12,29.910845,0.006940,1,0
227522,81,2.0,0 days 00:00:00.432000,11622.000000,7,99.0,False,12,34.663623,0.008043,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
228035,81,2.0,0 days 00:01:08.992000,11045.000000,7,99.0,False,12,4258.484374,0.988068,1,0
228036,81,2.0,0 days 00:01:09.152000,11166.142857,7,99.0,False,12,4270.833581,0.990933,1,0
228037,81,2.0,0 days 00:01:09.272000,11257.000000,7,99.0,False,12,4280.166914,0.993099,1,0
228038,81,2.0,0 days 00:01:09.472000,11370.000000,7,99.0,False,12,4295.889136,0.996746,1,0


In [56]:
import fastf1 as ff1

In [57]:
sched   = ff1.get_event_schedule(2025, backend="fastf1")
#row_df  = sched[sched["EventName"] == ev_name]

In [60]:
sched

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2025,2025-02-28,Pre-Season Testing,testing,Practice 1,2025-02-26 10:00:00+03:00,2025-02-26 07:00:00,Practice 2,2025-02-27 10:00:00+03:00,2025-02-27 07:00:00,Practice 3,2025-02-28 10:00:00+03:00,2025-02-28 07:00:00,,NaT,NaT,,NaT,NaT,True
1,1,Australia,Melbourne,FORMULA 1 LOUIS VUITTON AUSTRALIAN GRAND PRIX ...,2025-03-16,Australian Grand Prix,conventional,Practice 1,2025-03-14 12:30:00+11:00,2025-03-14 01:30:00,Practice 2,2025-03-14 16:00:00+11:00,2025-03-14 05:00:00,Practice 3,2025-03-15 12:30:00+11:00,2025-03-15 01:30:00,Qualifying,2025-03-15 16:00:00+11:00,2025-03-15 05:00:00,Race,2025-03-16 15:00:00+11:00,2025-03-16 04:00:00,True
2,2,China,Shanghai,FORMULA 1 HEINEKEN CHINESE GRAND PRIX 2025,2025-03-23,Chinese Grand Prix,sprint_qualifying,Practice 1,2025-03-21 11:30:00+08:00,2025-03-21 03:30:00,Sprint Qualifying,2025-03-21 15:30:00+08:00,2025-03-21 07:30:00,Sprint,2025-03-22 11:00:00+08:00,2025-03-22 03:00:00,Qualifying,2025-03-22 15:00:00+08:00,2025-03-22 07:00:00,Race,2025-03-23 15:00:00+08:00,2025-03-23 07:00:00,True
3,3,Japan,Suzuka,FORMULA 1 LENOVO JAPANESE GRAND PRIX 2025,2025-04-06,Japanese Grand Prix,conventional,Practice 1,2025-04-04 11:30:00+09:00,2025-04-04 02:30:00,Practice 2,2025-04-04 15:00:00+09:00,2025-04-04 06:00:00,Practice 3,2025-04-05 11:30:00+09:00,2025-04-05 02:30:00,Qualifying,2025-04-05 15:00:00+09:00,2025-04-05 06:00:00,Race,2025-04-06 14:00:00+09:00,2025-04-06 05:00:00,True
4,4,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2025,2025-04-13,Bahrain Grand Prix,conventional,Practice 1,2025-04-11 14:30:00+03:00,2025-04-11 11:30:00,Practice 2,2025-04-11 18:00:00+03:00,2025-04-11 15:00:00,Practice 3,2025-04-12 15:30:00+03:00,2025-04-12 12:30:00,Qualifying,2025-04-12 19:00:00+03:00,2025-04-12 16:00:00,Race,2025-04-13 18:00:00+03:00,2025-04-13 15:00:00,True
5,5,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2025,2025-04-20,Saudi Arabian Grand Prix,conventional,Practice 1,2025-04-18 16:30:00+03:00,2025-04-18 13:30:00,Practice 2,2025-04-18 20:00:00+03:00,2025-04-18 17:00:00,Practice 3,2025-04-19 16:30:00+03:00,2025-04-19 13:30:00,Qualifying,2025-04-19 20:00:00+03:00,2025-04-19 17:00:00,Race,2025-04-20 20:00:00+03:00,2025-04-20 17:00:00,True
6,6,United States,Miami,FORMULA 1 CRYPTO.COM MIAMI GRAND PRIX 2025,2025-05-04,Miami Grand Prix,sprint_qualifying,Practice 1,2025-05-02 12:30:00-04:00,2025-05-02 16:30:00,Sprint Qualifying,2025-05-02 16:30:00-04:00,2025-05-02 20:30:00,Sprint,2025-05-03 12:00:00-04:00,2025-05-03 16:00:00,Qualifying,2025-05-03 16:00:00-04:00,2025-05-03 20:00:00,Race,2025-05-04 16:00:00-04:00,2025-05-04 20:00:00,True
7,7,Italy,Imola,FORMULA 1 AWS GRAN PREMIO DEL MADE IN ITALY E ...,2025-05-18,Emilia Romagna Grand Prix,conventional,Practice 1,2025-05-16 13:30:00+02:00,2025-05-16 11:30:00,Practice 2,2025-05-16 17:00:00+02:00,2025-05-16 15:00:00,Practice 3,2025-05-17 12:30:00+02:00,2025-05-17 10:30:00,Qualifying,2025-05-17 16:00:00+02:00,2025-05-17 14:00:00,Race,2025-05-18 15:00:00+02:00,2025-05-18 13:00:00,True
8,8,Monaco,Monaco,FORMULA 1 TAG HEUER GRAND PRIX DE MONACO 2025,2025-05-25,Monaco Grand Prix,conventional,Practice 1,2025-05-23 13:30:00+02:00,2025-05-23 11:30:00,Practice 2,2025-05-23 17:00:00+02:00,2025-05-23 15:00:00,Practice 3,2025-05-24 12:30:00+02:00,2025-05-24 10:30:00,Qualifying,2025-05-24 16:00:00+02:00,2025-05-24 14:00:00,Race,2025-05-25 15:00:00+02:00,2025-05-25 13:00:00,True
9,9,Spain,Barcelona,FORMULA 1 ARAMCO GRAN PREMIO DE ESPAÑA 2025,2025-06-01,Spanish Grand Prix,conventional,Practice 1,2025-05-30 13:30:00+02:00,2025-05-30 11:30:00,Practice 2,2025-05-30 17:00:00+02:00,2025-05-30 15:00:00,Practice 3,2025-05-31 12:30:00+02:00,2025-05-31 10:30:00,Qualifying,2025-05-31 16:00:00+02:00,2025-05-31 14:00:00,Race,2025-06-01 15:00:00+02:00,2025-06-01 13:00:00,True


In [63]:
sched["EventFormat"].unique()

array(['testing', 'conventional', 'sprint_qualifying'], dtype=object)