# 07 Car Characteristics

**Assumptions:**
1. Sprint detection using EventFormat column
2. Speed extraction using speed trap data (no telemetry needed)

**Speed trap columns:**
- SpeedI1, SpeedI2 = Intermediate speed traps
- SpeedFL = Finish line speed
- SpeedST = Speed trap (if available)

In [1]:
import fastf1 as ff1
import pandas as pd
import numpy as np
from pathlib import Path
import json
from datetime import datetime

import logging
logging.getLogger("fastf1").setLevel(logging.ERROR)

import warnings
warnings.filterwarnings('ignore')

ff1.Cache.enable_cache('../data/raw/.fastf1_cache')

season = 2025

In [2]:
def extract_team_characteristics(session, team_name):
    """
    Extract performance for one team.
    
    FIXED: Uses speed trap data (SpeedI1, SpeedI2, SpeedFL) instead of telemetry.
    """
    team_laps = session.laps[session.laps['Team'] == team_name]
    
    if len(team_laps) == 0:
        return None
    
    # Get valid laps
    valid_laps = team_laps[
        (team_laps['LapTime'].notna()) &
        (team_laps['Sector1Time'].notna()) &
        (team_laps['Sector2Time'].notna()) &
        (team_laps['Sector3Time'].notna())
    ]
    
    if len(valid_laps) < 5:
        return None
    
    # Get fastest lap
    fastest_lap = valid_laps.loc[valid_laps['LapTime'].idxmin()]
    
    # Get driver
    driver = team_laps['Driver'].mode()[0] if len(team_laps['Driver'].mode()) > 0 else 'UNK'
    
    characteristics = {
        'team': team_name,
        'driver': driver,
        'event': session.event['EventName'],
        'session': session.name,
        'sector_times': {
            's1': float(fastest_lap['Sector1Time'].total_seconds()),
            's2': float(fastest_lap['Sector2Time'].total_seconds()),
            's3': float(fastest_lap['Sector3Time'].total_seconds())
        },
        'consistency': {
            'mean_lap_time': float(valid_laps['LapTime'].dt.total_seconds().mean()),
            'std_lap_time': float(valid_laps['LapTime'].dt.total_seconds().std()),
            'best_lap_time': float(fastest_lap['LapTime'].total_seconds()),
            'num_laps': len(valid_laps)
        }
    }
    
    # FIXED: Extract speed from speed trap columns (no telemetry needed!)
    speed_columns = ['SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST']
    available_speeds = []
    
    for col in speed_columns:
        if col in team_laps.columns:
            # Get max speed from this column
            speed_val = team_laps[col].max()
            if pd.notna(speed_val) and speed_val > 0:
                available_speeds.append(float(speed_val))
    
    if available_speeds:
        characteristics['speed_profile'] = {
            'top_speed': max(available_speeds)
        }
    
    return characteristics


def extract_session(year, event_name, session_name):
    """
    Extract characteristics from one session.
    
    telemetry=False for speed (I use speed trap data instead).
    """
    try:
        session = ff1.get_session(year, event_name, session_name)
        session.load(telemetry=False, laps=True, weather=False)
        
        teams = session.laps['Team'].unique()
        session_data = {}
        
        for team in teams:
            chars = extract_team_characteristics(session, team)
            if chars:
                session_data[team] = chars
        
        return session_data if len(session_data) > 0 else None
    except Exception as e:
        print(f"Failed: {e}")
        return None

In [3]:
calendar = ff1.get_event_schedule(season)

print(f"Extracting {season} season")
print(f"Total events: {len(calendar)}")
print("=" * 70)

all_car_data = {}
events_processed = []

for idx, event in calendar.iterrows():
    event_name = event['EventName']
    
    if 'Testing' in str(event_name) or pd.isna(event_name):
        continue
    
    print(f"\n{event_name}")
    print("-" * 70)
    
    # Sprint detection using EventFormat
    event_format = str(event.get('EventFormat', '')).lower()
    is_sprint = 'sprint' in event_format
    
    if is_sprint:
        sessions_to_extract = ['FP1', 'Sprint Qualifying']
        print(f"Sprint weekend - extracting: {', '.join(sessions_to_extract)}")
    else:
        sessions_to_extract = ['FP1', 'FP2', 'FP3']
        print(f"Normal weekend - extracting: {', '.join(sessions_to_extract)}")
    
    event_key = event_name.lower().replace(' ', '_')
    
    for session_name in sessions_to_extract:
        print(f"  {session_name}...", end=' ')
        
        session_data = extract_session(season, event_name, session_name)
        
        if session_data:
            print(f"游릭 ({len(session_data)} teams)")
            
            session_key = session_name.lower().replace(' ', '_')
            full_key = f"{event_key}_{session_key}"
            
            for team, chars in session_data.items():
                if team not in all_car_data:
                    all_car_data[team] = {}
                all_car_data[team][full_key] = chars
        else:
            print("游댮 (no data)")
    
    events_processed.append(event_name)

print(f"\n" + "=" * 70)
print(f"Extraction complete!")
print(f"Events: {len(events_processed)}")
print(f"Teams: {len(all_car_data)}")

Extracting 2025 season
Total events: 25

Australian Grand Prix
----------------------------------------------------------------------
Normal weekend - extracting: FP1, FP2, FP3
  FP1... 游릭 (10 teams)
  FP2... 游릭 (10 teams)
  FP3... 游릭 (10 teams)

Chinese Grand Prix
----------------------------------------------------------------------
Sprint weekend - extracting: FP1, Sprint Qualifying
  FP1... 游릭 (10 teams)
  Sprint Qualifying... 游릭 (10 teams)

Japanese Grand Prix
----------------------------------------------------------------------
Normal weekend - extracting: FP1, FP2, FP3
  FP1... 游릭 (10 teams)
  FP2... 游릭 (10 teams)
  FP3... 游릭 (10 teams)

Bahrain Grand Prix
----------------------------------------------------------------------
Normal weekend - extracting: FP1, FP2, FP3
  FP1... 游릭 (10 teams)
  FP2... 游릭 (10 teams)
  FP3... 游릭 (10 teams)

Saudi Arabian Grand Prix
----------------------------------------------------------------------
Normal weekend - extracting: FP1, FP2, FP3
  FP

## Validate Speed Data

Quick check that speed is actually extracted.

In [4]:
print("Speed data validation:")
print("=" * 70)

# Check first team's first session
if all_car_data:
    first_team = list(all_car_data.keys())[0]
    first_session_key = list(all_car_data[first_team].keys())[0]
    first_session = all_car_data[first_team][first_session_key]
    
    print(f"Example: {first_team} - {first_session_key}")
    print(f"  Sector times: {first_session.get('sector_times', 'MISSING')}")
    print(f"  Speed profile: {first_session.get('speed_profile', 'MISSING')}")
    
    if 'speed_profile' in first_session:
        top_speed = first_session['speed_profile'].get('top_speed', 0)
        if top_speed > 0:
            print(f"\n游릭 Top speed: {top_speed:.1f} km/h")
        else:
            print(f"\n游댮 Speed is 0! Check speed trap columns.")
    else:
        print(f"\n游댮 No speed_profile! Speed extraction failed.")
else:
    print("No data extracted!")

Speed data validation:
Example: Red Bull Racing - australian_grand_prix_fp1
  Sector times: {'s1': 26.863, 's2': 17.533, 's3': 33.3}
  Speed profile: {'top_speed': 326.0}

游릭 Top speed: 326.0 km/h


In [5]:
output = {
    'metadata': {
        'season': season,
        'last_updated': datetime.now().isoformat(),
        'events_processed': events_processed,
        'speed_extraction': 'Speed trap data (SpeedI1, SpeedI2, SpeedFL)'
    },
    'teams': all_car_data
}

output_path = Path(f'../data/processed/testing_files/car_characteristics/{season}_car_characteristics.json')
output_path.parent.mkdir(parents=True, exist_ok=True)

with open(output_path, 'w') as f:
    json.dump(output, f, indent=2)

print(f"游릭 Saved to {output_path}")
print(f"  File size: {output_path.stat().st_size / 1024:.1f} KB")

游릭 Saved to ../data/processed/testing_files/car_characteristics/2025_car_characteristics.json
  File size: 346.7 KB


## Final Test

Test with ranking code to make sure speed data is used.

In [6]:
import sys

PROJECT_ROOT = Path.cwd().parents[0]
sys.path.append(str(PROJECT_ROOT))

from src.extractors.performance_extractor import extract_all_teams_performance

print("Testing with ranking code...")
print("=" * 70)

perf = extract_all_teams_performance(all_car_data, 'fp1')

if perf:
    print(f"游릭 Extracted {len(perf)} teams\n")
    
    # Check speed values
    print("Speed check (sorted by top speed):")
    teams_by_speed = sorted(perf.items(), key=lambda x: x[1].get('top_speed', 0), reverse=True)
    
    for team, p in teams_by_speed[:5]:
        corners = p.get('slow_corner_performance', 0)
        speed = p.get('top_speed', 0)
        print(f"{team:<25} Corners: {corners:.3f}  Speed: {speed:.3f}")
    
    # Check if speed is actually different from 0
    speeds = [p.get('top_speed', 0) for p in perf.values()]
    max_speed = max(speeds)
    min_speed = min(speeds)
    
    print(f"\nSpeed range: {min_speed:.3f} to {max_speed:.3f}")
    
    if max_speed > 0:
        print("游릭 All good!")
    else:
        print("游댮 All speeds are 0 - extraction still broken!")
else:
    print("游댮 Extraction failed!")

Testing with ranking code...
游릭 Extracted 10 teams

Speed check (sorted by top speed):
Red Bull Racing           Corners: 0.640  Speed: 0.787
McLaren                   Corners: 0.501  Speed: 0.722
Racing Bulls              Corners: 0.594  Speed: 0.722
Aston Martin              Corners: 0.513  Speed: 0.646
Alpine                    Corners: 0.230  Speed: 0.562

Speed range: 0.133 to 0.787
游릭 All good!
