In [1]:
import fastf1 as ff1
from fastf1 import plotting
from fastf1.core import Laps

import pandas as pd
import numpy as np

In [2]:
# Enable the cache by providing the name of the cache folder, speed up
ff1.Cache.enable_cache('cache')

# Setup plotting, setup the plot (bg: black, ...)
plotting.setup_mpl()

In [5]:
ff1.Cache.offline_mode(True)

In [3]:
FILE_CIRCUITS = 'f1_unique_circuits_complete.csv'

circuit_info = pd.read_csv(FILE_CIRCUITS, delimiter=",")

In [6]:
session = ff1.get_session(2022, 'Suzuka', 'Q')
session.load()

core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.6.1]


req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '55', '11', '31', '44', '14', '63', '5', '4', '3', '77', '22', '24', '47', '23', '10', '20', '18', '6']


In [None]:
driver = 'VER'
# fastest lap of the driver
fastest_lap = session.laps.pick_drivers(driver).pick_fastest()
telemetry_driver = fastest_lap.get_telemetry().add_distance()


In [6]:
max(telemetry_driver['Speed'])

348.0

In [7]:
def calculate_physics(physics_df):
    # Convert speed from km/h to m/s and add it as a new column
    physics_df['v_m/s'] = physics_df['Speed'] / 3.6

    # Convert Time to seconds (float) and add it as a new column
    physics_df['time_s'] = physics_df['Time'] / np.timedelta64(1, 's')

    # Calculate longitudinal (forward/backward) acceleration (ax)
    # This is the derivative of velocity (v) with respect to time (t), or a = dv/dt
    # np.gradient() is a standard way to compute this from arrays of data.
    physics_df['ax_m/s^2'] = np.gradient(physics_df['v_m/s']) / np.gradient(physics_df['time_s'])

    # Smooth the acceleration data.
    # Telemetry data is often "noisy" (spiky), which makes derivatives very unstable.
    # np.convolve() applies a simple 3-point moving average to smooth out the spikes,
    # giving a more realistic acceleration value.
    physics_df['ax_smooth_m/s^2'] = np.convolve(
        physics_df['ax_m/s^2'], 
        np.ones((3,))/3, 
        mode='same'
    )

    # --- MODEL I: STATIC WEIGHT ONLY ---
    m = 798    # kg (Mass)
    g = 9.81   # m/s^2 (Gravity)
    staticWeightTot = m * g
    fractionWeightFront = 0.46

    # Calculate the static weight (in Newtons) on the front and rear axles
    staticWeightFront = staticWeightTot * fractionWeightFront
    staticWeightRear = staticWeightTot - staticWeightFront

    # Add the static loads to the DataFrame
    physics_df['loadFront_static_N'] = staticWeightFront
    physics_df['loadRear_static_N'] = staticWeightRear

    # --- MODEL II: INERTIAL LOAD TRANSFER ADDED ---
    CoGheight = 0.25    # m (Center of Gravity height)
    wheelbase = 3.6     # m (Wheelbase)

    # Calculate the *change* in load (deltaLoad) caused by longitudinal acceleration (ax)
    # This is the core "load transfer" equation.
    # If ax_smooth is positive (accelerating), deltaLoad becomes negative.
    physics_df['deltaLoad_Inertial_N'] = -CoGheight / wheelbase * m * physics_df['ax_smooth_m/s^2']

    # Add the inertial load to the static load
    # Calculate the new, dynamic load on the front and rear axles
    # Front Axle:
    #   - If accelerating (deltaLoad < 0), the front load *decreases*.
    physics_df['loadFront_Inertial_N'] = physics_df['loadFront_static_N'] + physics_df['deltaLoad_Inertial_N']
    # Rear Axle:
    #   - If accelerating (deltaLoad < 0), subtracting a negative *increases* the rear load.
    physics_df['loadRear_Inertial_N'] = physics_df['loadRear_static_N'] - physics_df['deltaLoad_Inertial_N']

    # --- MODEL III: AERODYNAMIC EFFECTS ADDED ---
    rho = 1.225         # kg/m^3 (Standard air density at sea level)
    CdA = 1.7           # Drag Coefficient * Frontal Area. Measures total air resistance.
    efficiency = 3.7      # Aerodynamic efficiency (Lift/Drag ratio). For F1, this is Downforce/Drag.
    ClA = efficiency*CdA  # Downforce Coefficient * Frontal Area. Measures total downforce.
    dragHeight = 0.5    # m (Height of drag force application)

    # Calculate Drag and Downforce (which depend on speed)
    # Calculate Drag Force (in Newtons)
    # Formula: Fd = 0.5 * rho * v^2 * CdA
    # Force increases with the *square* of velocity (v).
    physics_df['dragForce_N'] = 0.5 * CdA * rho * np.square(physics_df['v_m/s'])
    # Calculate Downforce (in Newtons)
    # Formula: Fl = 0.5 * rho * v^2 * ClA
    # This force also increases with the *square* of velocity.
    physics_df['downForce_N'] = 0.5 * ClA * rho * np.square(physics_df['v_m/s'])

    # Distribute the aerodynamic forces
    fractionDownforceFront = 0.4
    physics_df['downForceFront_N'] = physics_df['downForce_N'] * fractionDownforceFront
    physics_df['downForceRear_N'] = physics_df['downForce_N'] * (1 - fractionDownforceFront)

    # Calculate the load transfer caused *by drag*.
    # Drag pulls the car backward from a point *above* the ground (dragHeight),
    # which creates a moment that also shifts load to the rear axle.
    physics_df['deltaLoad_Drag_N'] = -dragHeight / wheelbase * physics_df['dragForce_N']

    # FINAL FRONT LOAD:
    # Model II load + downforce on the front + load transfer from drag
    physics_df['loadFront_AeroModel_N'] = (
        physics_df['loadFront_Inertial_N'] + 
        physics_df['downForceFront_N'] + 
        physics_df['deltaLoad_Drag_N']
    )
    # FINAL REAR LOAD:
    # Model II load + downforce on the rear - load transfer from drag
    physics_df['loadRear_AeroModel_N'] = (
        physics_df['loadRear_Inertial_N'] + 
        physics_df['downForceRear_N'] - 
        physics_df['deltaLoad_Drag_N']
    )

    return physics_df

# Longitudinal and Latitudinal acceleration

In [8]:
def convert_to_g(accelerations_ms2):
    g = 9.81
    return accelerations_ms2 / g


In [9]:
import numpy as np
import pandas as pd # Needed for type checking/conversion

def smooth_derivative(t_in, v_in, method="smooth"):
    """
    Computes a smooth estimation of a derivative (dv/dt) using a multi-point
    finite difference stencil to reduce noise amplification.
    """
    t = t_in.copy()
    v = v_in.copy()
    
    # (0) Prepare inputs: Ensure Time is transformed to seconds (a float array)
    try:
        # Check if t is a Pandas Series of timedelta objects
        if isinstance(t, pd.Series) and t.dtype == 'timedelta64[ns]':
            t = np.array([td.total_seconds() for td in t])
        else:
             # Assume it's a list/array of timedeltas needing conversion
             t = np.array([td.total_seconds() for td in t])
    except:
        # If it throws an error, it's likely already a float/int array (e.g., Distance)
        t = np.array(t)
        
    v = np.array(v)
    
    # Assert they have the same size and initialize output array
    assert t.size == v.size
    n = t.size
    dvdt = np.zeros(n)
    
    if n < 6:
        # Fallback to simple gradient if not enough points for the stencil
        return np.gradient(v, t)

    # (1) Manually compute points out of the stencil (boundary conditions)
    # The first few and last few points can't use the full stencil, so we use
    # a simpler, but stable, forward/backward difference approximation.
    # Helper function to safely calculate the slope (dv/dt)
    def safe_slope(v_num, t_den):
        # Check if the time difference is effectively zero
        if t_den == 0:
            return 0.0  # Return 0 derivative if time difference is zero
        return v_num / t_den

    # Boundary conditions: Apply the safe_slope helper to all boundary calculations
    dvdt[0] = safe_slope(v[1] - v[0], t[1] - t[0])
    dvdt[1] = safe_slope(v[2] - v[0], t[2] - t[0])
    dvdt[2] = safe_slope(v[3] - v[1], t[3] - t[1])
    
    dvdt[n-1] = safe_slope(v[n-1] - v[n-2], t[n-1] - t[n-2])
    dvdt[n-2] = safe_slope(v[n-1] - v[n-3], t[n-1] - t[n-3])
    dvdt[n-3] = safe_slope(v[n-2] - v[n-4], t[n-2] - t[n-4])

    # (2) Compute the rest of the points using the stencil
    if (method=='smooth'):
        # This uses a weighted average of multiple centered differences
        # (up to 3 points away) to achieve maximum noise suppression.
        c = [5./32., 4./32., 1./32.]
        for i in range(3, n-3):
            for j in range(1, 4):
                v_num = v[i+j] - v[i-j]
                t_den = t[i+j] - t[i-j]
                # Only add to the derivative if the time difference is non-zero
                if t_den != 0:
                    dvdt[i] += 2 * j * c[j-1] * (v_num / t_den)

    elif (method == 'centered'):
        # Fallback for a standard (but still 2-point) centered difference
        for i in range(3, n-3):
            v_num = v[i+1] - v[i-1]
            t_den = t[i+1] - t[i-1]
            
            # Only assign the derivative if the time difference is non-zero
            if t_den != 0:
                dvdt[i] = v_num / t_den
            else:
                dvdt[i] = 0.0 # Assign zero derivative if time difference is zero
            
    return dvdt

In [10]:
import math

def truncated_remainder(dividend, divisor):
    """
    Computes a truncated remainder, ensuring consistent mathematical behavior
    especially for negative numbers, unlike standard Python's modulo operator.
    """
    
    # Calculate the integer part of the division (the quotient)
    # Using specific logic to ensure 'truncation' (cutting off the decimal part 
    # toward zero), which differs from standard floor/ceil for negative numbers.
    divided_number = dividend / divisor
    divided_number = -int(-divided_number) if divided_number < 0 else int(divided_number)

    # Calculate the remainder: Remainder = Dividend - Divisor * Quotient
    remainder = dividend - divisor * divided_number

    return remainder

In [11]:
import math
import numpy as np

def transform_to_pipi(input_angle):
    """
    Unwinds an angle (in radians) to ensure it changes continuously
    and does not jump from +pi to -pi (or vice versa), which is vital for
    calculating a smooth derivative (rate of change of the angle).
    """
    pi = math.pi
    
    # Calculate how many full 2*pi revolutions have occurred
    revolutions = np.floor((input_angle + np.sign(input_angle) * pi) / (2 * pi))

    # Calculate the remainder of the angle after removing full revolutions
    p1 = truncated_remainder(input_angle + np.sign(input_angle) * pi, 2 * pi)
    
    # Apply a correction term (p2) based on the sign to correctly map the angle 
    # back into the range of [-pi, pi] (this handles the unwrapping logic)
    p2 = (np.sign(np.sign(input_angle)
                  + 2 * (np.sign(math.fabs((truncated_remainder(input_angle + pi, 2 * pi))
                                          / (2 * pi))) - 1))) * pi

    # The final unwrapped angle
    output_angle = p1 - p2

    return output_angle, revolutions

In [12]:
import math
import numpy as np

def remove_acceleration_outliers(acc):
    """
    Filters out extreme acceleration spikes by replacing values above a G-force
    threshold (default 7.0G) with the previous time step's value.
    """
    
    acc_threshold_g = 7.0  # F1 cars rarely sustain forces above 5-6G
    acc = np.array(acc)
    
    # Handle the very first point
    if (math.fabs(acc[0]) > acc_threshold_g):
        acc[0] = 0.0
        
    # Iterate through the main body of the data
    for i in range(1, acc.size-1):
        if ( math.fabs(acc[i]) > acc_threshold_g ):
            # Replace the outlier with the previous, clean value
            acc[i] = acc[i-1]
            
    # Handle the very last point
    if (acc.size > 1 and math.fabs(acc[-1]) > acc_threshold_g ):
        acc[-1] = acc[-2]
            
    return acc

In [13]:
import numpy as np
import math

def compute_accelerations(physics_df):
    """
    Calculates the Longitudinal (ax) and Lateral (ay) acceleration 
    (in G's) for every point in a telemetry lap.
    """
    
    # --- 1. Longitudinal Acceleration (ax) ---
    # Convert Speed from km/h to m/s for physics calculations
    v = np.array(physics_df['Speed']) / 3.6
    
    # Calculate the rate of change of speed over time (dv/dt) using the smooth derivative
    # Then divide by 9.81 m/s^2 to convert the units from m/s^2 to G-force.
    lon_acc = smooth_derivative(physics_df['Time'], v) / 9.81 

    # --- 2. Lateral Acceleration (ay) - Requires Heading & Curvature ---
    # Calculate the derivative of X and Y positions with respect to DISTANCE (dx/ds, dy/ds)
    # These derivatives define the tangent direction (the car's path)
    dx = smooth_derivative(physics_df['Distance'], physics_df['X'])
    dy = smooth_derivative(physics_df['Distance'], physics_df['Y'])

    # Initialize the array to store the car's heading angle (theta)
    theta = np.zeros(dx.size)
    
    # Determine the initial heading angle using atan2
    if dx.size > 0 and dy.size > 0:
        theta[0] = math.atan2(dy[0], dx[0])
    else:
        # Return zeros if telemetry is empty or too short
        physics_df['Longitudinal_G'] = np.zeros_like(v)
        physics_df['Lateral_G'] = np.zeros_like(v)
        return physics_df

    # Loop to calculate the continuous heading angle (theta)
    for i in range(1, dx.size):
        # Calculate the raw change in angle from the previous point
        delta_angle = math.atan2(dy[i], dx[i]) - theta[i-1]
        
        # Use transform_to_pipi to "unwind" this change, preventing jumps from +180 to -180
        unwound_delta, _ = transform_to_pipi(delta_angle)
        
        # Update the continuous heading angle
        theta[i] = theta[i-1] + unwound_delta
        
    # Curvature (kappa) is the rate of change of the continuous heading (theta) 
    # with respect to the Distance traveled (d(theta)/ds).
    kappa = smooth_derivative(physics_df['Distance'], theta)
    
    # Lateral Acceleration (ay) is calculated using the centripetal formula:
    # ay = v^2 * kappa. This is then converted to G-force.
    lat_acc = (v * v * kappa) / 9.81
    
    # --- 3. Outlier Removal ---
    # Apply the final filtering step to remove any extreme, non-physical spikes
    physics_df['Longitudinal_G'] = remove_acceleration_outliers(lon_acc)
    physics_df['Lateral_G'] = remove_acceleration_outliers(lat_acc)
    
    return physics_df

In [14]:
def compute_speeds(physics_df, fastest_lap):
    # Find max straight speed
    max_speed = physics_df['v_m/s'].max()
    physics_df['Max_Straight_Speed'] = max_speed

    # Find min corner speed
    # Define a "high-G corner" as the top 5% of all lateral G-forces
    HIGH_G_PERCENTILE = 95

    g_threshold = np.percentile(abs(physics_df['Lateral_G']), HIGH_G_PERCENTILE)
    high_g_corners = physics_df[abs(physics_df['Lateral_G']) >= g_threshold]
    min_corner_speed = high_g_corners['v_m/s'].min()
    physics_df['Min_Corner_Speed'] = min_corner_speed

    # Average lap speed
    total_distance_meters = physics_df['Distance'].max()
    total_time_seconds = fastest_lap['LapTime'].total_seconds()
    avg_speed_lap = total_distance_meters / total_time_seconds
    physics_df['Avg_Speed_Lap'] = avg_speed_lap

    return physics_df


In [15]:
def compute_forces(physics_df):
    # --- G-Force Features (Peak Performance) ---
    # maximum lateral G, car's maximum cornering grip
    #physics_df['LatG_max'] = max(abs(physics_df['Lateral_G']))
    physics_df['LatG_Max'] = physics_df['Lateral_G'].abs().max()

    # minimum longitudinal G, car's maximum braking force
    physics_df['LonG_Min'] = physics_df['Longitudinal_G'].min()
    # minimum longitudinal G, car's maximum acceleration
    physics_df['LonG_Max'] = physics_df['Longitudinal_G'].max()

    # --- Aero/Load Features (Setup & Car Characteristics) ---
    physics_df['DownForce_Max'] = physics_df['downForce_N'].max()
    physics_df['DownForce_Avg'] = physics_df['downForce_N'].mean()
    physics_df['DragForce_Max'] = physics_df['dragForce_N'].max()
    physics_df['LoadFront_Max'] = physics_df['loadFront_AeroModel_N'].max()
    physics_df['LoadRear_Max'] = physics_df['loadRear_AeroModel_N'].max()

    return physics_df

In [16]:
def compute_base_data_df(fastest_lap, session, year, location):  
    # --- 1. Base Data & Weather Merge (From fastest_lap) ---
    lap_features = pd.DataFrame([fastest_lap.to_dict()])

    # Time conversions and setup
    lap_features['LapTime'] = lap_features['LapTime'].dt.total_seconds()
    lap_features['Sector1Time'] = lap_features['Sector1Time'].dt.total_seconds()
    lap_features['Sector2Time'] = lap_features['Sector2Time'].dt.total_seconds()
    lap_features['Sector3Time'] = lap_features['Sector3Time'].dt.total_seconds()
    lap_features['Location'] = location
    lap_features['Year'] = year

    lap_features['LapStartTime'] = lap_features['Time']
    weather = session.weather_data
    lap_features = pd.merge_asof(
        lap_features,                # Use the new DataFrame
        weather,
        left_on='LapStartTime', # Column from fl_df
        right_on='Time',        # Column from weather
        direction='backward'  # Finds weather row *just before* or at LapStartTime
    )
    lap_features = lap_features.rename(columns={'Time_x': 'Time'})
    lap_features = lap_features.drop(columns={'Time_y'})

    lap_features = pd.merge(lap_features,
                            circuit_info,
                            on='Location',
                            how='left')
    
    lap_features['EventDate'] = session.event['EventDate']

    # --- 2. Target Variable (Y) - OFFICIAL RANK ---
    results_df = session.results
    drv = lap_features['Driver'].iloc[0]
    rank_series = results_df[results_df['Abbreviation'] == drv]['Position']
    lap_features['Quali_Rank'] = rank_series.iloc[0] if not rank_series.empty else np.nan
    
    KEEP_COLS = ['Time', 'Driver', 'DriverNumber', 'Team', 'LapTime', 'LapNumber', 'Stint',
                  'Sector1Time', 'Sector2Time', 'Sector3Time', 'Compound', 'TyreLife', 'FreshTyre',
                  'AirTemp', 'TrackTemp', 'Rainfall', 'Humidity', 'WindSpeed', 'WindDirection', 
                  'TrackLenght', 'NumTurns', 'Traction', 'AsphaltGrip', 'AsphaltAbrasion', 
                  'TrackEvolution', 'TyreStress', 'Braking', 'LateralLoad', 'Downforce', 
                  'Soft', 'Medium', 'Hard', 'Location', 'Country', 'Year', 'EventDate', 'Quali_Rank'
                  ]
    
    lap_features = lap_features[KEEP_COLS]
    
    # Define ALL columns that will eventually be in the final DataFrame.
    ALL_COLS = [
        'Time', 'Driver', 'DriverNumber', 'Team', 'LapTime', 'LapNumber', 'Stint',
        'Sector1Time', 'Sector2Time', 'Sector3Time', 'Compound', 'TyreLife', 'FreshTyre',
        'AirTemp', 'TrackTemp', 'Rainfall', 'Humidity', 'WindSpeed', 'WindDirection', 
        'TrackLenght', 'NumTurns', 'Traction', 'AsphaltGrip', 'AsphaltAbrasion', 
        'TrackEvolution', 'TyreStress', 'Braking', 'LateralLoad', 'Downforce', 
        'Soft', 'Medium', 'Hard', 'Location', 'Country', 'Year', 'EventDate', 'Quali_Rank',
        # Physics Features (ALL physics columns are initialized to NaN)
        'Max_Straight_Speed', 'Avg_Lap_Speed', 'Min_Corner_Speed', 'Peak_Cornering_G', 
        'Peak_Braking_G', 'Peak_Acceleration_G', 'Peak_Downforce_N', 'Avg_Downforce_N', 
        'Peak_Drag_N', 'Peak_Front_Load_N', 'Peak_Rear_Load_N',
    ]
    
    # Get the list of columns currently in the DataFrame
    current_cols = lap_features.columns.tolist()
    
    # Add all missing physics and info columns and set them to NaN
    for col in ALL_COLS:
        if col not in current_cols:
            lap_features[col] = np.nan
            
    # Filter to ensure order and only necessary columns remain
    final_base_cols = [col for col in ALL_COLS if col in lap_features.columns]
    
    return lap_features[final_base_cols].copy()


In [17]:
def add_physics_features(lap_features_df, physics_df, fastest_lap):
    """
    Calculates the physics aggregations and updates the existing DataFrame.
    """
    # --- Speed Features ---
    lap_features_df['Max_Straight_Speed'] = physics_df['v_m/s'].max()
    total_distance_meters = physics_df['Distance'].max()
    total_time_seconds = fastest_lap['LapTime'].total_seconds()
    lap_features_df['Avg_Lap_Speed'] = total_distance_meters / total_time_seconds

    HIGH_G_PERCENTILE = 95
    g_threshold = np.percentile(abs(physics_df['Lateral_G']), HIGH_G_PERCENTILE)
    high_g_corners = physics_df[abs(physics_df['Lateral_G']) >= g_threshold]
    lap_features_df['Min_Corner_Speed'] = high_g_corners['v_m/s'].min()

    # --- G-Force Features (Peak Performance) ---
    lap_features_df['Peak_Cornering_G'] = abs(physics_df['Lateral_G']).max()
    lap_features_df['Peak_Braking_G'] = physics_df['Longitudinal_G'].min()
    lap_features_df['Peak_Acceleration_G'] = physics_df['Longitudinal_G'].max()

    # --- Aero/Load Features (Your selected columns) ---
    lap_features_df['Peak_Downforce_N'] = physics_df['downForce_N'].max()
    lap_features_df['Avg_Downforce_N'] = physics_df['downForce_N'].mean()
    lap_features_df['Peak_Drag_N'] = physics_df['dragForce_N'].max()
    lap_features_df['Peak_Front_Load_N'] = physics_df['loadFront_AeroModel_N'].max()
    lap_features_df['Peak_Rear_Load_N'] = physics_df['loadRear_AeroModel_N'].max()

    return lap_features_df

# Building dataframe

In [23]:
YEAR = 2022
LOCATION = 'Baku'

session = ff1.get_session(YEAR, LOCATION, 'Q')
session.load()
drivers = session.drivers
event_laps = []

for driver_number in drivers:
    fastest_lap = session.laps.pick_drivers(driver_number).pick_fastest()
    try:
        lap_features_df = compute_base_data_df(fastest_lap, session, YEAR, LOCATION)

        try:
            telemetry_driver = fastest_lap.get_telemetry().add_distance()

            physics_df = telemetry_driver.copy()
            physics_df = calculate_physics(physics_df)
            physics_df = compute_accelerations(physics_df)
            lap_features_df = add_physics_features(lap_features_df, physics_df, fastest_lap)

        except Exception as e:
            print(f"Skipped physiscs {LOCATION} {YEAR} {driver_number}: {e}")

        event_laps.append(lap_features_df)
    except Exception as e:
            print(f"Skipped laps {LOCATION} {YEAR} {driver_number}: {e}")



core           INFO 	Loading data for Azerbaijan Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	No cached data found for position_data. Loading data...
_api           INFO 	Fetching position data...
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '11', '1', '55', '63', '10', '44', '22', '5', '14', '4', '3', '31', '24', '77', '20', '23', '6', '18', '47']


Skipped physiscs Baku 2022 16: '16'
Skipped physiscs Baku 2022 11: '11'
Skipped physiscs Baku 2022 1: '1'
Skipped physiscs Baku 2022 55: '55'
Skipped physiscs Baku 2022 63: '63'
Skipped physiscs Baku 2022 10: '10'
Skipped physiscs Baku 2022 44: '44'
Skipped physiscs Baku 2022 22: '22'
Skipped physiscs Baku 2022 5: '5'
Skipped physiscs Baku 2022 14: '14'
Skipped physiscs Baku 2022 4: '4'
Skipped physiscs Baku 2022 3: '3'
Skipped physiscs Baku 2022 31: '31'
Skipped physiscs Baku 2022 24: '24'
Skipped physiscs Baku 2022 77: '77'
Skipped physiscs Baku 2022 20: '20'
Skipped physiscs Baku 2022 23: '23'
Skipped physiscs Baku 2022 6: '6'
Skipped physiscs Baku 2022 18: '18'
Skipped physiscs Baku 2022 47: '47'


In [24]:
event_laps[1]

Unnamed: 0,Time,Driver,DriverNumber,Team,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,...,Avg_Lap_Speed,Min_Corner_Speed,Peak_Cornering_G,Peak_Braking_G,Peak_Acceleration_G,Peak_Downforce_N,Avg_Downforce_N,Peak_Drag_N,Peak_Front_Load_N,Peak_Rear_Load_N
0,0 days 01:40:43.524000,PER,11,Red Bull Racing,101.641,17.0,4.0,35.642,40.997,25.002,...,,,,,,,,,,


In [25]:
run_config = [
    (2022, 2, None),  # Per [2:]
    (2023, 1, None),  # Per [1:]
    (2024, 1, None),  # Per [1:]
    (2025, 1, 20)    # Per [1:20]
]

all_qualis = []

skipped_races = []
skipped_laps = []
skipped_physics = []

for YEAR, START, END in run_config:
    locations = ff1.get_event_schedule(YEAR)['Location'][START:END]

    for LOCATION in locations:
        try:
            if LOCATION == 'Miami Gardens':
                LOCATION = 'Miami'

            session = ff1.get_session(YEAR, LOCATION, 'Q')
            session.load()
            drivers = session.drivers
            event_laps = []

            for driver_number in drivers:
                try:
                    fastest_lap = session.laps.pick_drivers(driver_number).pick_fastest()
                    lap_features_df = compute_base_data_df(fastest_lap, session, YEAR, LOCATION)

                    try:
                        telemetry_driver = fastest_lap.get_telemetry().add_distance()

                        physics_df = telemetry_driver.copy()
                        physics_df = calculate_physics(physics_df)
                        physics_df = compute_accelerations(physics_df)
                        lap_features_df = add_physics_features(lap_features_df, physics_df, fastest_lap)

                    except Exception as e:
                        error_msg = f"{LOCATION} {YEAR} {driver_number}: {e}"
                        print(f"Skipped physics: {error_msg}")
                        skipped_physics.append(error_msg)

                    event_laps.append(lap_features_df)
                    
                except Exception as e:
                        error_msg = f"{LOCATION} {YEAR} {driver_number}: {e}"
                        print(f"Skipped lap: {error_msg}")
                        skipped_laps.append(error_msg)

            el_df = pd.concat(event_laps, ignore_index=True)
            all_qualis.append(el_df)

        except Exception as e:
            error_msg = f"{LOCATION} {YEAR}: {e}"
            print(f"Skipped race: {error_msg}")
            skipped_races.append(error_msg)


all_qualis_df = pd.concat(all_qualis, ignore_index=True)

core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '55', '11', '44', '77', '20', '14', '63', '10', '31', '47', '4', '23', '24', '22', '27', '3', '18', '6']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Usi

Skipped lap: Jeddah 2022 22: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '11', '4', '44', '63', '3', '31', '55', '14', '10', '77', '22', '24', '47', '23', '20', '5', '6', '18']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Skipped lap: Melbourne 2022 18: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '4', '20', '14', '3', '11', '77', '5', '55', '63', '47', '44', '24', '18', '22', '10', '6', '31', '23']
core           INFO 	Loading data for Miami Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Skipped lap: Imola 2022 23: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '11', '77', '44', '10', '4', '22', '18', '14', '63', '5', '3', '47', '20', '24', '23', '6', '31']
core           INFO 	Loading data for Spanish Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Skipped lap: Miami 2022 31: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '55', '63', '11', '44', '77', '20', '3', '47', '4', '31', '22', '10', '24', '5', '14', '18', '23', '6']
core           INFO 	Loading data for Monaco Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
re

Skipped physics: Baku 2022 16: '16'
Skipped physics: Baku 2022 11: '11'
Skipped physics: Baku 2022 1: '1'
Skipped physics: Baku 2022 55: '55'
Skipped physics: Baku 2022 63: '63'
Skipped physics: Baku 2022 10: '10'
Skipped physics: Baku 2022 44: '44'
Skipped physics: Baku 2022 22: '22'
Skipped physics: Baku 2022 5: '5'
Skipped physics: Baku 2022 14: '14'
Skipped physics: Baku 2022 4: '4'
Skipped physics: Baku 2022 3: '3'
Skipped physics: Baku 2022 31: '31'
Skipped physics: Baku 2022 24: '24'
Skipped physics: Baku 2022 77: '77'
Skipped physics: Baku 2022 20: '20'
Skipped physics: Baku 2022 23: '23'
Skipped physics: Baku 2022 6: '6'
Skipped physics: Baku 2022 18: '18'
Skipped physics: Baku 2022 47: '47'


req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '55', '44', '20', '47', '31', '63', '3', '24', '77', '23', '11', '4', '16', '10', '5', '18', '6', '22']
core           INFO 	Loading data for British Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info

Skipped race: Le Castellet 2022: No objects to concatenate


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '55', '16', '4', '31', '14', '44', '77', '3', '1', '11', '24', '20', '18', '47', '22', '23', '5', '10', '6']
core           INFO 	Loading data for Belgian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
r

Skipped physics: Zandvoort 2022 23: attempt to get argmin of an empty sequence


core           INFO 	Loading data for Italian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '55', '11', '44', '63', '4', '3', '10', '14', '31', '77', '45', '24', '22', '6', '5', '18', '20', '47']
core           INFO 	Loading data for Singapore Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using ca

Skipped lap: Melbourne 2023 11: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '11', '55', '44', '14', '4', '22', '18', '81', '63', '31', '23', '77', '2', '24', '27', '20', '10', '21']
core           INFO 	Loading data for Miami Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
r

Skipped lap: Suzuka 2023 2: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '63', '44', '14', '16', '81', '10', '31', '77', '4', '22', '55', '11', '23', '27', '2', '18', '40', '20', '24']
core           INFO 	Loading data for United States Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for positio

Skipped lap: Mexico City 2023 2: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '18', '14', '44', '63', '4', '55', '11', '81', '27', '31', '10', '20', '23', '22', '3', '77', '2', '24']
core           INFO 	Loading data for Las Vegas Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_dat

Skipped lap: Yas Island 2023 2: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '63', '55', '11', '14', '4', '81', '44', '27', '22', '18', '23', '3', '20', '77', '24', '2', '31', '10']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position

Skipped lap: Jeddah 2024 24: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['1', '55', '11', '4', '16', '81', '63', '22', '18', '14', '44', '23', '77', '20', '31', '27', '10', '3', '24']
core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req 

Skipped lap: Imola 2024 2: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '31', '3', '18', '27', '14', '2', '20', '11', '77', '24']


Skipped physics: Monaco 2024 23: attempt to get argmin of an empty sequence


core           INFO 	Loading data for Canadian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '1', '4', '81', '3', '14', '44', '22', '18', '23', '16', '55', '2', '20', '10', '11', '77', '31', '27', '24']
core           INFO 	Loading data for Spanish Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using ca

Skipped physics: Barcelona 2024 18: attempt to get argmin of an empty sequence


core           INFO 	Loading data for Austrian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '55', '44', '16', '81', '11', '27', '31', '3', '20', '10', '22', '14', '23', '18', '77', '2', '24']
core           INFO 	Loading data for British Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using ca

Skipped lap: Zandvoort 2024 2: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '63', '16', '55', '44', '1', '11', '23', '27', '14', '3', '20', '10', '31', '22', '18', '43', '77', '24']
core           INFO 	Loading data for Azerbaijan Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_d

Skipped lap: Melbourne 2025 87: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data


Skipped lap: Imola 2025 22: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '16', '81', '44', '1', '6', '14', '31', '30', '23', '55', '22', '27', '63', '12', '5', '87', '10', '18', '43']
core           INFO 	Loading data for Spanish Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data


Skipped lap: Zandvoort 2025 18: 'NoneType' object has no attribute 'to_dict'


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '44', '63', '12', '5', '14', '22', '87', '27', '55', '23', '31', '6', '18', '43', '10', '30']
core           INFO 	Loading data for Azerbaijan Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_da

Skipped lap: Austin 2025 6: 'NoneType' object has no attribute 'to_dict'


In [26]:
skipped_laps

["Jeddah 2022 22: 'NoneType' object has no attribute 'to_dict'",
 "Melbourne 2022 18: 'NoneType' object has no attribute 'to_dict'",
 "Imola 2022 23: 'NoneType' object has no attribute 'to_dict'",
 "Miami 2022 31: 'NoneType' object has no attribute 'to_dict'",
 "Melbourne 2023 11: 'NoneType' object has no attribute 'to_dict'",
 "Suzuka 2023 2: 'NoneType' object has no attribute 'to_dict'",
 "Mexico City 2023 2: 'NoneType' object has no attribute 'to_dict'",
 "Yas Island 2023 2: 'NoneType' object has no attribute 'to_dict'",
 "Jeddah 2024 24: 'NoneType' object has no attribute 'to_dict'",
 "Imola 2024 2: 'NoneType' object has no attribute 'to_dict'",
 "Zandvoort 2024 2: 'NoneType' object has no attribute 'to_dict'",
 "Melbourne 2025 87: 'NoneType' object has no attribute 'to_dict'",
 "Imola 2025 22: 'NoneType' object has no attribute 'to_dict'",
 "Zandvoort 2025 18: 'NoneType' object has no attribute 'to_dict'",
 "Austin 2025 6: 'NoneType' object has no attribute 'to_dict'"]

In [27]:
skipped_physics

["Baku 2022 16: '16'",
 "Baku 2022 11: '11'",
 "Baku 2022 1: '1'",
 "Baku 2022 55: '55'",
 "Baku 2022 63: '63'",
 "Baku 2022 10: '10'",
 "Baku 2022 44: '44'",
 "Baku 2022 22: '22'",
 "Baku 2022 5: '5'",
 "Baku 2022 14: '14'",
 "Baku 2022 4: '4'",
 "Baku 2022 3: '3'",
 "Baku 2022 31: '31'",
 "Baku 2022 24: '24'",
 "Baku 2022 77: '77'",
 "Baku 2022 20: '20'",
 "Baku 2022 23: '23'",
 "Baku 2022 6: '6'",
 "Baku 2022 18: '18'",
 "Baku 2022 47: '47'",
 'Zandvoort 2022 23: attempt to get argmin of an empty sequence',
 'Monaco 2024 23: attempt to get argmin of an empty sequence',
 'Barcelona 2024 18: attempt to get argmin of an empty sequence']

In [28]:
skipped_races

['Le Castellet 2022: No objects to concatenate']

In [35]:
all_qualis_df.to_csv('features_quali_telemetry.csv', index=False)

In [36]:
all_qualis_df['EventDate']

0      2022-03-20
1      2022-03-20
2      2022-03-20
3      2022-03-20
4      2022-03-20
          ...    
1699   2025-10-19
1700   2025-10-19
1701   2025-10-19
1702   2025-10-19
1703   2025-10-19
Name: EventDate, Length: 1704, dtype: datetime64[ns]

In [32]:
all_qualis_df['Team'] = all_qualis_df['Team'].replace(['AlphaTauri', 'Racing Bulls'], 'RB')
all_qualis_df['Team'] = all_qualis_df['Team'].replace(['Alfa Romeo'], 'Kick Sauber')

In [33]:
all_qualis_df.value_counts('Team')

Team
McLaren            172
Ferrari            172
Mercedes           172
Alpine             171
Kick Sauber        171
Haas F1 Team       171
Aston Martin       170
RB                 170
Red Bull Racing    170
Williams           165
Name: count, dtype: int64

In [49]:
import logging
logging.getLogger('fastf1').setLevel(logging.WARNING)

In [52]:
all_qualis_df

Unnamed: 0,Time,Driver,DriverNumber,Team,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,...,Min_Corner_Speed,Peak_Cornering_G,Peak_Braking_G,Peak_Acceleration_G,Peak_Downforce_N,Avg_Downforce_N,Peak_Drag_N,Peak_Front_Load_N,Peak_Rear_Load_N,Rank
0,0 days 01:14:47.968000,LEC,16,Ferrari,90.558,14.0,5.0,29.115,38.702,22.741,...,35.033563,6.315997,-5.639210,1.655335,30061.176736,15110.574496,8124.642361,15967.697325,23405.277478,1
1,0 days 01:15:15.614000,VER,1,Red Bull Racing,90.681,13.0,4.0,28.970,38.832,22.879,...,24.943577,6.049043,-4.409588,2.149994,30822.189082,14709.982510,8330.321373,15830.695983,23927.015267,2
2,0 days 01:07:34.650000,SAI,55,Ferrari,90.687,11.0,4.0,29.036,38.842,22.809,...,34.722222,6.617073,-5.031731,2.238076,29872.410002,14717.819360,8073.624325,15139.076301,23294.986842,3
3,0 days 01:15:21.593000,PER,11,Red Bull Racing,90.921,17.0,6.0,29.180,38.894,22.847,...,36.388889,6.820775,-4.622779,1.671452,31013.928520,14899.578713,8382.142843,16030.054736,24011.176938,4
4,0 days 00:55:33.904000,HAM,44,Mercedes,91.048,10.0,3.0,29.100,39.038,22.910,...,26.944444,6.203374,-4.674010,1.668267,29872.410002,14982.631982,8073.624325,14955.892800,23263.427286,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,0 days 00:29:21.710000,MAG,20,Haas F1 Team,85.834,8.0,2.0,17.637,36.876,31.321,...,28.297619,6.285979,-4.379501,1.769746,31786.831684,15536.006809,8591.035590,15927.352317,24504.781940,16
256,0 days 00:21:22.638000,GAS,10,AlphaTauri,85.859,5.0,2.0,17.559,36.854,31.446,...,45.833333,6.894909,-4.486195,1.458548,31399.191020,15715.337850,8486.267843,15664.977358,24288.403388,17
257,0 days 00:28:54.123000,BOT,77,Alfa Romeo,85.892,5.0,2.0,17.808,36.913,31.171,...,27.111111,6.884178,-4.922296,1.582875,31206.262500,15511.837413,8434.125000,16011.351288,24135.136841,18
258,0 days 00:28:48.926000,ALB,23,Williams,86.028,8.0,2.0,17.561,36.708,31.759,...,26.833333,6.745130,-4.358507,1.669836,32964.022656,15745.752053,8909.195312,16786.648806,25273.686130,19


In [41]:
all_qualis_df['EventDate']

0      2022-03-20
1      2022-03-20
2      2022-03-20
3      2022-03-20
4      2022-03-20
          ...    
1699   2025-10-19
1700   2025-10-19
1701   2025-10-19
1702   2025-10-19
1703   2025-10-19
Name: EventDate, Length: 1704, dtype: datetime64[ns]

In [51]:
all_qualis_df.to_csv('quali_telemetry.csv', index=False)

In [37]:
all_qualis_df.isna().sum()

Time                    0
Driver                  0
DriverNumber            0
Team                    0
LapTime                 0
LapNumber               0
Stint                   0
Sector1Time             0
Sector2Time             0
Sector3Time             0
Compound                0
TyreLife                3
FreshTyre               0
AirTemp                 0
TrackTemp               0
Rainfall                0
Humidity                0
WindSpeed               0
WindDirection           0
TrackLenght             0
NumTurns                0
Traction                0
AsphaltGrip             0
AsphaltAbrasion         0
TrackEvolution          0
TyreStress              0
Braking                 0
LateralLoad             0
Downforce               0
Soft                    0
Medium                  0
Hard                    0
Location                0
Country                 0
Year                    0
EventDate               0
Quali_Rank              0
Max_Straight_Speed     23
Avg_Lap_Spee

# MERGE FEATURES QUALI-RACE

In [38]:
features_race = pd.read_csv('features_race_p3_PER_EVENT_scores.csv', index_col=False)

In [43]:
all_qualis_df.columns

Index(['Time', 'Driver', 'DriverNumber', 'Team', 'LapTime', 'LapNumber',
       'Stint', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'Compound',
       'TyreLife', 'FreshTyre', 'AirTemp', 'TrackTemp', 'Rainfall', 'Humidity',
       'WindSpeed', 'WindDirection', 'TrackLenght', 'NumTurns', 'Traction',
       'AsphaltGrip', 'AsphaltAbrasion', 'TrackEvolution', 'TyreStress',
       'Braking', 'LateralLoad', 'Downforce', 'Soft', 'Medium', 'Hard',
       'Location', 'Country', 'Year', 'EventDate', 'Quali_Rank',
       'Max_Straight_Speed', 'Avg_Lap_Speed', 'Min_Corner_Speed',
       'Peak_Cornering_G', 'Peak_Braking_G', 'Peak_Acceleration_G',
       'Peak_Downforce_N', 'Avg_Downforce_N', 'Peak_Drag_N',
       'Peak_Front_Load_N', 'Peak_Rear_Load_N'],
      dtype='object')

In [44]:
all_qualis_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 48 columns):
 #   Column               Non-Null Count  Dtype          
---  ------               --------------  -----          
 0   Time                 1704 non-null   timedelta64[ns]
 1   Driver               1704 non-null   object         
 2   DriverNumber         1704 non-null   object         
 3   Team                 1704 non-null   object         
 4   LapTime              1704 non-null   float64        
 5   LapNumber            1704 non-null   float64        
 6   Stint                1704 non-null   float64        
 7   Sector1Time          1704 non-null   float64        
 8   Sector2Time          1704 non-null   float64        
 9   Sector3Time          1704 non-null   float64        
 10  Compound             1704 non-null   object         
 11  TyreLife             1701 non-null   float64        
 12  FreshTyre            1704 non-null   bool           
 13  AirTemp           

In [42]:
features_race.columns

Index(['Driver', 'Location', 'Year', 'Team', 'EventDate', 'Team_Pace_EMA',
       'Driver_Teammate_Gap_EMA', 'Driver_Consistency_EMA', 'Degradation_EMA'],
      dtype='object')

In [45]:
features_race.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1640 entries, 0 to 1639
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Driver                   1640 non-null   object 
 1   Location                 1640 non-null   object 
 2   Year                     1640 non-null   int64  
 3   Team                     1640 non-null   object 
 4   EventDate                1640 non-null   object 
 5   Team_Pace_EMA            1640 non-null   float64
 6   Driver_Teammate_Gap_EMA  1640 non-null   float64
 7   Driver_Consistency_EMA   1640 non-null   float64
 8   Degradation_EMA          1640 non-null   float64
dtypes: float64(4), int64(1), object(4)
memory usage: 115.4+ KB


In [94]:
merge_keys = ['Driver', 'Location', 'Year']

final_df = pd.merge(
    all_qualis_df,
    features_race,
    on=merge_keys,
    how='inner'
)

In [95]:
final_df = final_df.rename(columns={'EventDate_x': 'EventDate'})
final_df = final_df.drop(columns={'EventDate_y'})

final_df = final_df.rename(columns={'Team_x': 'Team'})
final_df = final_df.drop(columns={'Team_y'})


In [84]:
final_df = final_df.sort_values(by=['Year', 'EventDate', 'Quali_Rank'])


In [101]:
final_df.to_csv('features_quali_complete_P3.csv', index=False)

In [98]:
final_df = final_df.dropna(subset=['Quali_Rank'])
final_df = final_df.dropna(subset=['Max_Straight_Speed'])
final_df = final_df.dropna(subset=['Team_Pace_EMA'])

In [99]:
final_df

Unnamed: 0,Time,Driver,DriverNumber,Team,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,...,Peak_Acceleration_G,Peak_Downforce_N,Avg_Downforce_N,Peak_Drag_N,Peak_Front_Load_N,Peak_Rear_Load_N,Team_Pace_EMA,Driver_Teammate_Gap_EMA,Driver_Consistency_EMA,Degradation_EMA
0,0 days 01:14:47.968000,LEC,16,Ferrari,90.558,14.0,5.0,29.115,38.702,22.741,...,1.655335,30061.176736,15110.574496,8124.642361,15967.697325,23405.277478,-1.606421,-0.418709,1.294945,0.138100
1,0 days 01:15:15.614000,VER,1,Red Bull Racing,90.681,13.0,4.0,28.970,38.832,22.879,...,2.149994,30822.189082,14709.982510,8330.321373,15830.695983,23927.015267,-1.454666,-0.214258,1.176590,0.131253
2,0 days 01:07:34.650000,SAI,55,Ferrari,90.687,11.0,4.0,29.036,38.842,22.809,...,2.238076,29872.410002,14717.819360,8073.624325,15139.076301,23294.986842,-1.606421,0.418709,1.285464,0.088300
3,0 days 01:15:21.593000,PER,11,Red Bull Racing,90.921,17.0,6.0,29.180,38.894,22.847,...,1.671452,31013.928520,14899.578713,8382.142843,16030.054736,24011.176938,-1.454666,0.214258,1.278144,0.114015
4,0 days 00:55:33.904000,HAM,44,Mercedes,91.048,10.0,3.0,29.100,39.038,22.910,...,1.668267,29872.410002,14982.631982,8073.624325,14955.892800,23263.427286,-0.951462,-0.340234,1.172743,0.100051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1621,0 days 00:42:17.817000,COL,43,Alpine,94.039,7.0,3.0,25.323,37.874,30.842,...,1.728940,30822.189082,14341.431708,8330.321373,16022.029189,23903.996256,0.809274,0.023433,1.185255,-0.037586
1622,0 days 00:41:21.876000,BOR,5,Kick Sauber,94.125,7.0,3.0,25.326,38.036,30.763,...,1.767536,31013.928520,14398.443736,8382.142843,15790.023815,24004.686968,0.304365,0.300834,1.148753,-0.009591
1623,0 days 00:41:40.433000,OCO,31,Haas F1 Team,94.136,7.0,3.0,25.431,37.931,30.774,...,1.745797,31206.262500,14320.249434,8434.125000,15701.891150,24134.756086,0.429606,0.458798,1.294687,-0.027581
1624,0 days 00:32:21.571000,STR,18,Aston Martin,94.540,3.0,2.0,25.614,38.008,30.918,...,1.974140,31013.928520,14305.130511,8382.142843,15360.052484,24036.061922,0.294133,0.250399,1.204013,-0.036862


In [100]:
final_df.isna().sum()

Time                       0
Driver                     0
DriverNumber               0
Team                       0
LapTime                    0
LapNumber                  0
Stint                      0
Sector1Time                0
Sector2Time                0
Sector3Time                0
Compound                   0
TyreLife                   2
FreshTyre                  0
AirTemp                    0
TrackTemp                  0
Rainfall                   0
Humidity                   0
WindSpeed                  0
WindDirection              0
TrackLenght                0
NumTurns                   0
Traction                   0
AsphaltGrip                0
AsphaltAbrasion            0
TrackEvolution             0
TyreStress                 0
Braking                    0
LateralLoad                0
Downforce                  0
Soft                       0
Medium                     0
Hard                       0
Location                   0
Country                    0
Year          