In [16]:
import fastf1 as ff1
import pandas as pd
from datetime import datetime
import numpy as np
from fastf1.ergast import Ergast
import requests
from functools import lru_cache
import logging

In [17]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Suppress FastF1 INFO and DEBUG messages
logging.getLogger("fastf1").setLevel(logging.WARNING)

In [18]:
ff1.Cache.enable_cache('data')

# Session utilities

In [19]:
def load_session(year, location, session_name):
    """
    Load and return a FastF1 session with telemetry and lap data.

    Parameters:
    - year (int): F1 season year (e.g., 2023)
    - location (str): Track location (e.g., 'Monza')
    - session_name (str): Session type (e.g., 'FP1', 'FP2', 'FP3')

    Returns:
    - session (FastF1.Session): Loaded session object
    - Returns None if session type does not exist (e.g., no FP3 on 2-day weekends).
    """
    try:
        session = ff1.get_session(year, location, session_name)
        session.load(telemetry=True, laps=True)
        return session
    except ValueError as ve:
        if "does not exist for this event" in str(ve):
            print(f"‚ÑπÔ∏è {year} {location} has no {session_name} (skipped)")
        else:
            print(f"‚ö†Ô∏è Could not load session {year} {location} {session_name}: {ve}")
        return None
    except Exception as e:
        print(f"‚ö†Ô∏è Could not load session {year} {location} {session_name}: {e}")
        return None

In [20]:
@lru_cache(maxsize=None)
def get_elevation(latitude, longitude):
    """ 
    Get the elevation of a single point based on coordinates.
    
    Parameters:
    - latitude
    - longitude

    Returns single value - altitude above sea level in meters.
    """
    url = f"https://api.open-elevation.com/api/v1/lookup?locations={latitude},{longitude}"
    
    response = requests.get(url)
    data = response.json()
    elevation = data["results"][0]["elevation"]

    return elevation

In [21]:
def get_circuits(season):
    """
    Get main geolocation info of the tracks:
    - latitude
    - longitude
    - official circuit name
    - altitude above sea level

    Parameters:
    - season: int indicating the year/season of Formula 1

    Returns:
    - DataFrame with circuitName, location, latitude, longitude, and altitude
    """
    ergast = Ergast()
    racetracks = ergast.get_circuits(season)

    results = []

    for racetrack in racetracks.circuitName:
        try:
            row = racetracks[racetracks.circuitName == racetrack].iloc[0]
            circuit_name = row['circuitName']
            latitude = row['lat']
            longitude = row['long']
            locality = row['locality']
            country = row['country']

            altitude = get_elevation(latitude, longitude)

            results.append({
                'circuitName': racetrack,
                'location': locality,
                'country': country,
                'lat': latitude,
                'lon': longitude,
                'altitude': altitude
            })

        except Exception as e:
            print(f"‚ö†Ô∏è Failed to get altitude for {racetrack}: {e}")
            continue

    return pd.DataFrame(results)


In [22]:
def get_all_circuits(start_year=2020, end_year=2025):
    all_rows = []
    for year in range(start_year, end_year + 1):
        df = get_circuits(year)
        all_rows.append(df)
    
    full_df = pd.concat(all_rows, ignore_index=True)
    
    # Keep only the first unique occurrence per circuit
    deduped = full_df.drop_duplicates(subset=['circuitName'], keep='first').reset_index(drop=True)
    
    return deduped


# Feature Extractors

In [23]:
def extract_track_metrics(session):
    """
    Extract average speed, top speed, and braking profile from a loaded session.

    Parameters:
    - session (FastF1.Session): Loaded FastF1 session

    Returns:
    - dict: {
        avg_speed, top_speed, braking_events,
        low_pct, med_pct, high_pct
      } or None if extraction failed
    """
    try:
        if session.laps.empty:
            return None

        lap = session.laps.pick_fastest()
        telemetry = lap.get_car_data().add_distance()
        telemetry['delta_speed'] = telemetry['Speed'].diff()
        heavy_brakes = telemetry['delta_speed'] < -30
        braking_events = heavy_brakes.sum()

        return {
            'avg_speed': telemetry['Speed'].mean(),
            'top_speed': telemetry['Speed'].max(),
            'braking_events': braking_events,
            'low_pct': (telemetry.Speed < 120).mean(),
            'med_pct': ((telemetry.Speed >= 120) & (telemetry.Speed < 200)).mean(),
            'high_pct': (telemetry.Speed >= 200).mean()
        }

    except Exception as e:
        print(f"‚ö†Ô∏è Failed to extract metrics: {e}")
        return None


In [24]:
def get_circuit_corner_profile(session, low_thresh=100, med_thresh=170):
    """
    Detect corners and categorize them by entry speed using local speed minima.

    Parameters:
    - session (FastF1.Session): Loaded session
    - low_thresh (int): max speed for slow corners (km/h)
    - med_thresh (int): max speed for medium corners (km/h)

    Returns:
    - dict: {
        slow_corners, medium_corners, fast_corners, chicanes
      } or None if failed
    """
    try:
        lap = session.laps.pick_fastest()
        tel = lap.get_car_data().add_distance()
        tel['prev_speed'] = tel['Speed'].shift(1)
        tel['next_speed'] = tel['Speed'].shift(-1)
        tel['is_corner'] = (tel['Speed'] < tel['prev_speed']) & (tel['Speed'] < tel['next_speed'])
        corners = tel[tel['is_corner']].copy()

        corners['corner_type'] = pd.cut(
            corners['Speed'],
            bins=[0, low_thresh, med_thresh, 400],
            labels=['slow', 'medium', 'fast']
        )
        counts = corners['corner_type'].value_counts().to_dict()

        corners['DistanceFromPrev'] = corners['Distance'].diff().fillna(9999)
        chicanes = (corners['DistanceFromPrev'] < 200).sum()

        return {
            'slow_corners': counts.get('slow', 0),
            'medium_corners': counts.get('medium', 0),
            'fast_corners': counts.get('fast', 0),
            'chicanes': chicanes
        }

    except Exception as e:
        print(f"‚ö†Ô∏è Corner profile failed: {e}")
        return None


In [25]:
def get_drs_info(session, track_length):
    """
    Estimate DRS info from telemetry (fastest lap).

    Parameters:
    - session (FastF1.Session): Loaded session
    - track_length (float): Estimated lap length (m)

    Returns:
    - dict: num_drs_zones, drs_total_len_m, drs_pct_of_lap
    """
    try:
        lap = session.laps.pick_fastest()
        tel = lap.get_car_data().add_distance()

        if 'DRS' not in tel.columns:
            raise ValueError("DRS channel not available in telemetry")

        # Identify rows where DRS is active (1- activated, 8 available)
        drs_active = tel[tel['DRS'].isin([1, 8])].copy()
        if drs_active.empty:
            raise ValueError("No DRS usage detected in lap")

        # Tag separate DRS zones (gaps > 100m in distance)
        drs_active['gap'] = drs_active['Distance'].diff().fillna(0)
        drs_active['zone_id'] = (drs_active['gap'] > 100).cumsum()

        # Compute total length and count zones
        zone_lengths = drs_active.groupby('zone_id')['Distance'].agg(['min', 'max'])
        zone_lengths['length'] = zone_lengths['max'] - zone_lengths['min']

        num_drs_zones = len(zone_lengths)
        drs_total_len = zone_lengths['length'].sum()
        drs_pct = drs_total_len / track_length if track_length else np.nan

        return {
            'num_drs_zones': num_drs_zones,
            'drs_total_len_m': drs_total_len,
            'drs_pct_of_lap': drs_pct
        }

    except Exception as e:
        print(f"‚ö†Ô∏è Failed to infer DRS zones: {e}")
        return {
            'num_drs_zones': 0,
            'drs_total_len_m': 0,
            'drs_pct_of_lap': np.nan
        }


# Driver telemetry

In [26]:
def get_driver_max_throttle_ratio(session, driver, max_throttle_threshold=98, season=None, session_name=None):
    """
    Get the max throttle ratio for the fastest lap in the session for a selected driver.

    Parameters:
    - session: FastF1 loaded session object (must be .load()-ed)
    - driver: abbreviated driver name (e.g., 'VER')
    - max_throttle_threshold: threshold for detecting 'max throttle' zones (default: 98%)
    - season: (optional) int year
    - session_name: (optional) string like 'FP1', used for session_uid

    Returns:
    - full_throttle: DataFrame with driver metrics and session_uid
    - missing_info: DataFrame with fallback in case of telemetry issues
    """

    gp_name = session.event['EventName']
    location = session.event['Location']

    try:
        fastest_driver = session.laps.pick_drivers([driver]).pick_fastest()
        telemetry = fastest_driver.get_telemetry().add_distance()

        # Merge weather info
        telemetry = pd.merge_asof(
            telemetry,
            session.weather_data[['Time', 'Rainfall', 'TrackTemp', 'AirTemp']],
            left_on='SessionTime',
            right_on='Time'
        )

        # Calculate heavy braking zones
        telemetry['delta_speed'] = telemetry['Speed'].diff()
        heavy_brakes = telemetry['delta_speed'] < -30  # threshold in km/h
        braking_events = heavy_brakes.sum()

        # Identify throttle segments
        telemetry['nextThrottle'] = telemetry.Throttle.shift(-1)
        telemetry['previousThrottle'] = telemetry.Throttle.shift(1)

        throttle_points = telemetry.loc[
            (telemetry.Throttle >= max_throttle_threshold) &
            (
                (telemetry.nextThrottle < max_throttle_threshold) |
                (telemetry.previousThrottle < max_throttle_threshold) |
                (telemetry.index.isin([telemetry.index[0], telemetry.index[-1]]))
            )
        ].copy()

        throttle_points['FTRelative'] = throttle_points.RelativeDistance.diff().fillna(0)

        # Calculate full throttle ratio
        ratio = throttle_points.loc[
            (throttle_points.nextThrottle < max_throttle_threshold) |
            (throttle_points.nextThrottle.isna())
        ]['FTRelative'].sum()

        # Prepare result row
        result = pd.DataFrame([{
            'grand_prix': gp_name,
            'location': location,
            'driver': driver,
            'ratio': ratio,
            'compound': fastest_driver['Compound'],
            'tyre_age': fastest_driver['TyreLife'],
            'is_fresh_tyre': fastest_driver['FreshTyre'],
            'avg_rainfall': telemetry['Rainfall'].mean(),
            'avg_track_temp': telemetry['TrackTemp'].mean(),
            'avg_air_temp': telemetry['AirTemp'].mean(),
            'braking_events': braking_events,
            'session_uid': f"{season}_{location}_{session_name}" if season and session_name else None
        }])

        return result, None

    except Exception:
        # Telemetry missing or failed to load
        missing = pd.DataFrame([{
            'grand_prix': gp_name,
            'location': location,
            'driver': driver,
            'session_uid': f"{season}_{location}_{session_name}" if season and session_name else None
        }])
        return None, missing


In [27]:
def get_all_drivers_throttle_input(session, season=None, session_name=None):
    """
    Get the max throttle ratio and braking event data for all drivers in a session.

    Parameters:
    - session: loaded FastF1 session
    - season: optional, used to generate session_uid
    - session_name: optional, like 'FP1', used to generate session_uid

    Returns:
    - correct_readings: DataFrame with all valid telemetry-derived data
    """
    if session.laps.empty:
        print(f"‚ö†Ô∏è No laps found in session: {session.event['EventName']}")
        return pd.DataFrame(), pd.DataFrame()

    drivers = pd.unique(session.laps['Driver'])
    gp_name = session.event['EventName']
    location = session.event['Location']

    throttle_data = []
    missing_data = []

    for driver in drivers:
        result, missing = get_driver_max_throttle_ratio(
            session, driver,
            season=season,
            session_name=session_name
        )
        if result is not None:
            throttle_data.append(result)
        if missing is not None:
            missing_data.append(missing)

    full_throttle = pd.concat(throttle_data, ignore_index=True) if throttle_data else pd.DataFrame()
    missing_info = pd.concat(missing_data, ignore_index=True) if missing_data else pd.DataFrame()

    # Remove throttle ratio outliers
    invalid_mask = (full_throttle.ratio > 0.85) | (full_throttle.ratio < 0.4)
    full_throttle.loc[invalid_mask, 'ratio'] = np.NaN

    correct_readings = full_throttle.dropna(subset=['ratio'])
    incorrect_readings = full_throttle[full_throttle.ratio.isna()]

    # Logged but not used as of now
    if not incorrect_readings.empty:
        missing_info = pd.concat(
            [missing_info, incorrect_readings[['grand_prix', 'location', 'driver', 'session_uid']]],
            ignore_index=True
        )

    return correct_readings


# High-Level Builders

In [28]:
def build_circuit_profile_df(start_year=2020, end_year=2025):
    """
    Builds a DataFrame with circuit-level metrics for all practice sessions across seasons.

    Parameters:
    - start_year: int ‚Äî First season to include
    - end_year: int ‚Äî Last season to include

    Returns:
    - df_profiles: DataFrame with circuit performance and layout characteristics
    - df_skipped: DataFrame logging sessions or years that failed (with reasons)
    """

    # Load all circuits and their metadata (altitude, location, lat/lon)
    # This is pulled from the Ergast API via your get_all_circuits()
    circuit_metadata = get_all_circuits(start_year, end_year)

    # Lists to store successful and failed processing records
    records = []
    skipped_sessions = []

    # Loop over each season
    for year in range(start_year, end_year + 1):
        try:
            # Load the event schedule from Ergast backend (more reliable fallback)
            schedule = ff1.get_event_schedule(year, backend='ergast')

            # Filter out events that haven't happened yet
            races = schedule[schedule.Session1DateUtc < datetime.utcnow()]
        except Exception as e:
            # If loading the full schedule fails for a year, log it and continue
            print(f"‚ö†Ô∏è Skipped year {year} ‚Äî failed to load schedule: {e}")
            skipped_sessions.append({
                "year": year,
                "location": None,
                "session": None,
                "event": None,
                "reason": f"Schedule load failed: {e}"
            })
            continue

        # Loop through each race in the schedule
        for _, row in races.iterrows():
            location = row['Location']               # e.g., 'Sakhir'
            event_name = row['EventName']           # e.g., 'Bahrain Grand Prix'
            format_type = row['EventFormat']        # 'conventional' or 'sprint'

            # Define which practice sessions to include
            sessions = ['FP1', 'FP2', 'FP3', 'Q', 'R'] if format_type == 'conventional' else ['FP1','S', 'SS', 'SQ','Q','R']

            for session_name in sessions:
                try:
                    print(f"üîç Processing {year} {location} {session_name}")

                    # Load telemetry and metadata for the session
                    session = load_session(year, location, session_name)

                    # Sanity checks for required data
                    if session is None:
                        raise ValueError("Session loading returned None")
                    if session.laps.empty:
                        raise ValueError("Session laps not available or empty")
                    if not hasattr(session, "weather_data") or session.weather_data.empty:
                        raise ValueError("Weather data missing")

                    # Estimate track length using fastest lap distance
                    try:
                        lap = session.laps.pick_fastest()
                        track_length = lap.get_car_data().add_distance()['Distance'].max()
                    except Exception as e:
                        print(f"‚ö†Ô∏è Could not estimate track length: {e}")
                        track_length = np.nan

                    # Warn but don't skip if track length is missing
                    if pd.isna(track_length):
                        print(f"‚ö†Ô∏è {year} {location} {session_name} missing track length ‚Äî DRS % will be NaN")

                    # Extract feature sets
                    drs_data = get_drs_info(session, track_length)                  # DRS zones, total length, %
                    track_metrics = extract_track_metrics(session)                 # avg/top speed, braking, throttle %
                    corner_data = get_circuit_corner_profile(session)              # slow/medium/fast corners + chicanes

                    # Match altitude using 'location' (e.g. 'Sakhir') against circuit_metadata
                    alt_lookup = circuit_metadata[circuit_metadata['location'] == location]
                    altitude = alt_lookup['altitude'].values[0] if not alt_lookup.empty else np.nan

                    # Ensure core metrics were extracted
                    if not track_metrics:
                        raise ValueError("Missing telemetry metrics")

                    # Compile the record for this session
                    record = {
                        'year': year,
                        'location': location,
                        'event': event_name,
                        'session': session_name,
                        'real_altitude': altitude,
                        'lap_length':track_length,
                        **drs_data,
                        **track_metrics,
                        **corner_data
                    }

                    records.append(record)

                except Exception as e:
                    # Log any failure that happens during processing of a session
                    print(f"‚ö†Ô∏è Skipped {year} {location} {session_name}: {e}")
                    skipped_sessions.append({
                        "year": year,
                        "location": location,
                        "session": session_name,
                        "event": event_name,
                        "reason": str(e)
                    })

    # Convert final lists to DataFrames
    df_profiles = pd.DataFrame(records)
    df_skipped = pd.DataFrame(skipped_sessions)

    # Print final summary for console
    print(f"\n‚úÖ Done: {len(df_profiles)} sessions parsed, {len(df_skipped)} skipped.")

    return df_profiles, df_skipped


# Testing

In [29]:
profiles, skipped = build_circuit_profile_df(2024, 2024)

print(skipped)


üîç Processing 2024 Sakhir FP1
üîç Processing 2024 Sakhir FP2
üîç Processing 2024 Sakhir FP3
üîç Processing 2024 Sakhir Q
üîç Processing 2024 Sakhir R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Jeddah FP1
üîç Processing 2024 Jeddah FP2
üîç Processing 2024 Jeddah FP3
üîç Processing 2024 Jeddah Q
üîç Processing 2024 Jeddah R
üîç Processing 2024 Melbourne FP1




üîç Processing 2024 Melbourne FP2




üîç Processing 2024 Melbourne FP3
üîç Processing 2024 Melbourne Q
üîç Processing 2024 Melbourne R
üîç Processing 2024 Suzuka FP1




üîç Processing 2024 Suzuka FP2
üîç Processing 2024 Suzuka FP3
üîç Processing 2024 Suzuka Q
üîç Processing 2024 Suzuka R
üîç Processing 2024 Shanghai FP1
üîç Processing 2024 Shanghai S




‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Shanghai SS
‚ÑπÔ∏è 2024 Shanghai has no SS (skipped)
‚ö†Ô∏è Skipped 2024 Shanghai SS: Session loading returned None
üîç Processing 2024 Shanghai SQ
üîç Processing 2024 Shanghai Q
üîç Processing 2024 Shanghai R




üîç Processing 2024 Miami FP1
üîç Processing 2024 Miami S




üîç Processing 2024 Miami SS
‚ÑπÔ∏è 2024 Miami has no SS (skipped)
‚ö†Ô∏è Skipped 2024 Miami SS: Session loading returned None
üîç Processing 2024 Miami SQ
üîç Processing 2024 Miami Q
üîç Processing 2024 Miami R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Imola FP1
üîç Processing 2024 Imola FP2
üîç Processing 2024 Imola FP3
üîç Processing 2024 Imola Q




üîç Processing 2024 Imola R




‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Monte-Carlo FP1




üîç Processing 2024 Monte-Carlo FP2




üîç Processing 2024 Monte-Carlo FP3




üîç Processing 2024 Monte-Carlo Q




üîç Processing 2024 Monte-Carlo R




üîç Processing 2024 Montreal FP1




üîç Processing 2024 Montreal FP2




üîç Processing 2024 Montreal FP3




üîç Processing 2024 Montreal Q




üîç Processing 2024 Montreal R




üîç Processing 2024 Montmel√≥ FP1




üîç Processing 2024 Montmel√≥ FP2




üîç Processing 2024 Montmel√≥ FP3




üîç Processing 2024 Montmel√≥ Q




üîç Processing 2024 Montmel√≥ R
üîç Processing 2024 Spielberg FP1
üîç Processing 2024 Spielberg S




üîç Processing 2024 Spielberg SS
‚ÑπÔ∏è 2024 Spielberg has no SS (skipped)
‚ö†Ô∏è Skipped 2024 Spielberg SS: Session loading returned None
üîç Processing 2024 Spielberg SQ
üîç Processing 2024 Spielberg Q
üîç Processing 2024 Spielberg R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Silverstone FP1
üîç Processing 2024 Silverstone FP2
üîç Processing 2024 Silverstone FP3
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Silverstone Q
üîç Processing 2024 Silverstone R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Budapest FP1
üîç Processing 2024 Budapest FP2
üîç Processing 2024 Budapest FP3
üîç Processing 2024 Budapest Q
üîç Processing 2024 Budapest R




‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Spa FP1




üîç Processing 2024 Spa FP2




üîç Processing 2024 Spa FP3




üîç Processing 2024 Spa Q




üîç Processing 2024 Spa R




‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Zandvoort FP1
üîç Processing 2024 Zandvoort FP2
üîç Processing 2024 Zandvoort FP3
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Zandvoort Q




üîç Processing 2024 Zandvoort R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Monza FP1
üîç Processing 2024 Monza FP2
üîç Processing 2024 Monza FP3
üîç Processing 2024 Monza Q
üîç Processing 2024 Monza R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Baku FP1
üîç Processing 2024 Baku FP2
üîç Processing 2024 Baku FP3


['0 days 00:44:25.841000']
Length: 1, dtype: timedelta64[ns]' has dtype incompatible with datetime64[ns], please explicitly cast to a compatible dtype first.
  result.loc[mask, 'LapStartTime'] = result.loc[mask, 'PitOutTime']


üîç Processing 2024 Baku Q
üîç Processing 2024 Baku R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Marina Bay FP1
üîç Processing 2024 Marina Bay FP2
üîç Processing 2024 Marina Bay FP3
üîç Processing 2024 Marina Bay Q
üîç Processing 2024 Marina Bay R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Austin FP1
üîç Processing 2024 Austin S




‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Austin SS
‚ÑπÔ∏è 2024 Austin has no SS (skipped)
‚ö†Ô∏è Skipped 2024 Austin SS: Session loading returned None
üîç Processing 2024 Austin SQ
üîç Processing 2024 Austin Q
üîç Processing 2024 Austin R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Mexico City FP1




üîç Processing 2024 Mexico City FP2
üîç Processing 2024 Mexico City FP3
üîç Processing 2024 Mexico City Q
üîç Processing 2024 Mexico City R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 S√£o Paulo FP1
üîç Processing 2024 S√£o Paulo S




üîç Processing 2024 S√£o Paulo SS
‚ÑπÔ∏è 2024 S√£o Paulo has no SS (skipped)
‚ö†Ô∏è Skipped 2024 S√£o Paulo SS: Session loading returned None
üîç Processing 2024 S√£o Paulo SQ
üîç Processing 2024 S√£o Paulo Q
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 S√£o Paulo R




üîç Processing 2024 Las Vegas FP1
üîç Processing 2024 Las Vegas FP2
üîç Processing 2024 Las Vegas FP3
üîç Processing 2024 Las Vegas Q
üîç Processing 2024 Las Vegas R




‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Al Daayen FP1




üîç Processing 2024 Al Daayen S
‚ÑπÔ∏è 2024 Al Daayen has no S (skipped)
‚ö†Ô∏è Skipped 2024 Al Daayen S: Session loading returned None
üîç Processing 2024 Al Daayen SS
‚ÑπÔ∏è 2024 Al Daayen has no SS (skipped)
‚ö†Ô∏è Skipped 2024 Al Daayen SS: Session loading returned None
üîç Processing 2024 Al Daayen SQ
‚ÑπÔ∏è 2024 Al Daayen has no SQ (skipped)
‚ö†Ô∏è Skipped 2024 Al Daayen SQ: Session loading returned None
üîç Processing 2024 Al Daayen Q




üîç Processing 2024 Al Daayen R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap
üîç Processing 2024 Abu Dhabi FP1
üîç Processing 2024 Abu Dhabi FP2
üîç Processing 2024 Abu Dhabi FP3
üîç Processing 2024 Abu Dhabi Q
üîç Processing 2024 Abu Dhabi R
‚ö†Ô∏è Failed to infer DRS zones: No DRS usage detected in lap

‚úÖ Done: 118 sessions parsed, 8 skipped.
   year   location session                     event  \
0  2024   Shanghai      SS        Chinese Grand Prix   
1  2024      Miami      SS          Miami Grand Prix   
2  2024  Spielberg      SS       Austrian Grand Prix   
3  2024     Austin      SS  United States Grand Prix   
4  2024  S√£o Paulo      SS      S√£o Paulo Grand Prix   
5  2024  Al Daayen       S          Qatar Grand Prix   
6  2024  Al Daayen      SS          Qatar Grand Prix   
7  2024  Al Daayen      SQ          Qatar Grand Prix   

                          reason  
0  Session loading returned None  
1  Session loading returned None  
2  Session load

In [15]:
profiles

Unnamed: 0,year,location,event,session,real_altitude,lap_length,num_drs_zones,drs_total_len_m,drs_pct_of_lap,avg_speed,top_speed,braking_events,low_pct,med_pct,high_pct,slow_corners,medium_corners,fast_corners,chicanes
0,2024,Sakhir,Bahrain Grand Prix,FP1,10.0,5357.197222,3,3638.044722,0.679095,206.997167,310.0,11,0.124646,0.286119,0.589235,3,4,2,0
1,2024,Sakhir,Bahrain Grand Prix,FP2,10.0,5342.252778,3,3627.975278,0.679110,213.737952,316.0,10,0.114458,0.268072,0.617470,2,0,0,0
2,2024,Sakhir,Bahrain Grand Prix,FP3,10.0,5390.449722,3,3651.490278,0.677400,212.495652,316.0,10,0.124638,0.255072,0.620290,2,4,1,1
3,2024,Sakhir,Bahrain Grand Prix,Q,10.0,5369.465278,3,3650.405278,0.679845,215.886567,319.0,15,0.119403,0.259701,0.620896,2,3,3,1
4,2024,Sakhir,Bahrain Grand Prix,R,10.0,5356.580278,0,0.000000,,208.853868,301.0,7,0.120344,0.269341,0.610315,3,4,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,2024,Abu Dhabi,Abu Dhabi Grand Prix,FP1,1.0,5220.736667,3,3760.578611,0.720316,221.600601,328.0,7,0.099099,0.270270,0.630631,1,3,2,0
114,2024,Abu Dhabi,Abu Dhabi Grand Prix,FP2,1.0,5225.690556,3,3691.926944,0.706496,223.424837,324.0,12,0.094771,0.277778,0.627451,0,6,1,1
115,2024,Abu Dhabi,Abu Dhabi Grand Prix,FP3,1.0,5226.500000,3,3762.391111,0.719868,223.435737,328.0,10,0.094044,0.278997,0.626959,2,3,3,1
116,2024,Abu Dhabi,Abu Dhabi Grand Prix,Q,1.0,5211.091667,3,3689.376389,0.707985,228.103125,324.0,10,0.087500,0.246875,0.665625,1,3,4,2


# Work in progress

1. quali pace
2. race pace
3. Pirelli tire info
4. when to query the data?
    - the time of the race is not consistent
    - on sprint race weekends the quali takes place on Friday
    - US/Asia/Australia races happen at different times

In [18]:
def create_pirelli_tyre_info(location, traction, evolution, lateral_load, abrasion, braking, grip, tyre_stress, downforce):
    return pd.DataFrame([{
        'Location': location,
        'traction': traction,
        'evolution': evolution,
        'lateral_load': lateral_load,
        'abrasion': abrasion,
        'braking': braking,
        'grip': grip,
        'tyre_stress': tyre_stress,
        'downforce':downforce
        }])


In [None]:
bahrain_pirelli = create_pirelli_tyre_info('Sakhir', 4, 4, 3, 5, 4, 3, 3, 3)
jeddah_pirelli = create_pirelli_tyre_info('Jeddah', 2, 3, 4, 2, 2, 3, 3, 2)
melbourne_pirelli = create_pirelli_tyre_info('Melbourne', 2, 4, 3, 2, 2, 3, 3, 3)
baku_pirelli = create_pirelli_tyre_info('Baku', 5, 5, 1, 1, 4, 1, 3, 1)
miami_pirelli = create_pirelli_tyre_info('Miami',3,5,3,2,3,3,3,2)
monaco_pirelli = create_pirelli_tyre_info('Monaco', 5,5,1,1,2,1,1,5)
barcelona_pirelli = create_pirelli_tyre_info('Barcelona', 3,3,5,4,3,3,5,4) #2023
barcelona_pirelli = create_pirelli_tyre_info('Barcelona', 3,3,4,4,3,3,4,4) #2022
montreal_pirelli = create_pirelli_tyre_info('Montreal', 5,5,1,2,5,1,3,1)

spielberg_pirelli = create_pirelli_tyre_info('Spielberg', 2,3,5)
silverstone_pirelli = create_pirelli_tyre_info('Silverstone', 4,4,10)
budapest_pirelli = create_pirelli_tyre_info('Budapest', 3,7,4)
spa_pirelli = create_pirelli_tyre_info('Spa', 3,8,8)
imola_pirelli = create_pirelli_tyre_info('Imola', 5,3,11)
abu_dhabi_pirelli = create_pirelli_tyre_info('Abu Dhabi',3,6,7)
las_vegas_pirelli = create_pirelli_tyre_info('Las Vegas', 7,4,6)
mexico_pirelli = create_pirelli_tyre_info('Mexico City', 7,6,4)
sao_paulo_pirelli = create_pirelli_tyre_info('Sao Paulo', 1,6,8)
suzuka_pirelli = create_pirelli_tyre_info('Suzuka', 3,4,11)
qatar_pirelli = create_pirelli_tyre_info('Qatar', 1,4,11)
austin_pirelli = create_pirelli_tyre_info('Austin', 7,5,8)
zandvoort_pirelli = create_pirelli_tyre_info('Zandvoort', 2,6,6)
monza_pirelli = create_pirelli_tyre_info('Monza', 2,5,4)
singapore_pirelli = create_pirelli_tyre_info('Singapore', 10,8,5) #2022
singapore_pirelli = create_pirelli_tyre_info('Singapore', 7,7,5) #2023


In [None]:
pirelli = pd.concat([
    bahrain_pirelli,
    jeddah_pirelli,
    melbourne_pirelli,
    baku_pirelli,
    miami_pirelli ,
    monaco_pirelli ,
    barcelona_pirelli,
    montreal_pirelli ,
    ], 
    ignore_index=True
    )