# 18: Driver Characteristics Extraction - extended

Extract driver characteristics from 2024 season:
- Racecraft (from teammate comparisons)
- DNF Risk (from actual race results)
- Pace metrics (from quali/race)

## Setup

In [1]:
import json
import numpy as np
import pandas as pd
import fastf1 as ff1
from pathlib import Path
from collections import defaultdict
import warnings
import logging

logging.getLogger("fastf1").setLevel(logging.ERROR)
warnings.filterwarnings('ignore')

ff1.Cache.enable_cache('../data/raw/.fastf1_cache')

print("游릭 Setup complete")

游릭 Setup complete


## Step 1: Extract REAL DNF Data from 2024

In [2]:
print("Extracting DNF data from 2024 season...")
print("="*70)

# Get 2024 schedule
schedule = ff1.get_event_schedule(2024)

# Track DNFs per driver
driver_dnfs = defaultdict(lambda: {'total_races': 0, 'dnfs': 0, 'dnf_races': []})

for _, event in schedule.iterrows():
    race_name = event['EventName']
    
    # Skip testing
    if 'Testing' in str(race_name):
        continue
    
    try:
        # Load race session
        race = ff1.get_session(2024, race_name, 'R')
        race.load(laps=False, telemetry=False, weather=False)
        
        # Extract DNF status for each driver
        for _, result in race.results.iterrows():
            driver = result['Abbreviation']
            
            if not driver:
                continue
            
            # Count race
            driver_dnfs[driver]['total_races'] += 1
            
            # Check DNF (use actual fastf1 property)
            dnf = result.dnf if hasattr(result, 'dnf') else False
            status = str(result['Status']) if 'Status' in result else ''
            
            # Also check status string
            if not dnf and status:
                dnf = 'Finished' not in status and '+' not in status
            
            if dnf:
                driver_dnfs[driver]['dnfs'] += 1
                driver_dnfs[driver]['dnf_races'].append({
                    'race': race_name,
                    'status': status
                })
        
        print(f"  游릭 {race_name}")
        
    except Exception as e:
        print(f"  游댮  {race_name}: {e}")
        continue

print(f"\n游릭 Extracted DNF data for {len(driver_dnfs)} drivers")

Extracting DNF data from 2024 season...
  游릭 Bahrain Grand Prix
  游릭 Saudi Arabian Grand Prix
  游릭 Australian Grand Prix
  游릭 Japanese Grand Prix
  游릭 Chinese Grand Prix
  游릭 Miami Grand Prix
  游릭 Emilia Romagna Grand Prix
  游릭 Monaco Grand Prix
  游릭 Canadian Grand Prix
  游릭 Spanish Grand Prix
  游릭 Austrian Grand Prix
  游릭 British Grand Prix
  游릭 Hungarian Grand Prix
  游릭 Belgian Grand Prix
  游릭 Dutch Grand Prix
  游릭 Italian Grand Prix
  游릭 Azerbaijan Grand Prix
  游릭 Singapore Grand Prix
  游릭 United States Grand Prix
  游릭 Mexico City Grand Prix
  游릭 S칚o Paulo Grand Prix
  游릭 Las Vegas Grand Prix
  游릭 Qatar Grand Prix
  游릭 Abu Dhabi Grand Prix

游릭 Extracted DNF data for 24 drivers


## Step 2: Calculate DNF Risk Metrics

In [3]:
dnf_data = {
    'season': 2024,
    'extraction_type': 'actual_race_results',
    'drivers': {}
}

for driver, data in driver_dnfs.items():
    total_races = data['total_races']
    total_dnfs = data['dnfs']
    
    if total_races == 0:
        continue
    
    dnf_rate = total_dnfs / total_races
    
    # Classify DNF types
    dnf_types = defaultdict(int)
    for dnf_race in data['dnf_races']:
        status = dnf_race['status'].lower()
        
        if any(word in status for word in ['accident', 'collision', 'crash', 'damage']):
            dnf_types['incident'] += 1
        elif any(word in status for word in ['engine', 'gearbox', 'mechanical', 'electrical']):
            dnf_types['mechanical'] += 1
        elif 'disqualified' in status:
            dnf_types['disqualified'] += 1
        else:
            dnf_types['other'] += 1
    
    # Risk level
    if dnf_rate < 0.05:
        risk_level = 'very_low'
    elif dnf_rate < 0.10:
        risk_level = 'low'
    elif dnf_rate < 0.15:
        risk_level = 'moderate'
    elif dnf_rate < 0.25:
        risk_level = 'high'
    else:
        risk_level = 'very_high'
    
    dnf_data['drivers'][driver] = {
        'dnf_rate': float(dnf_rate),
        'risk_level': risk_level,
        'total_races': total_races,
        'total_dnfs': total_dnfs,
        'dnf_types': dict(dnf_types)
    }

print(f"游릭 Calculated DNF risk for {len(dnf_data['drivers'])} drivers")

游릭 Calculated DNF risk for 24 drivers


## Step 3: Load Teammate Ratio Data & Calculate Racecraft

In [4]:
# Load ratio files
base_path = Path('../data/processed/testing_files/driver_characteristics')

with open(base_path / 'driver_race_characteristics.json') as f:
    race_ratios = json.load(f)

print(f"游릭 Loaded {len(race_ratios)} race comparisons")

# Aggregate ratios per driver
race_by_driver = defaultdict(list)
for comp in race_ratios:
    race_by_driver[comp['driver']].append(comp['ratio'])

# Calculate racecraft
racecraft_data = {
    'season': 2024,
    'extraction_type': 'from_teammate_ratios',
    'drivers': {}
}

for driver, ratios in race_by_driver.items():
    avg_ratio = np.mean(ratios)
    
    # Convert ratio to skill (lower ratio = faster = better)
    skill = 0.5 + (1.0 - avg_ratio) * 5
    skill = max(0.2, min(0.9, skill))
    
    # Assign type
    if skill > 0.7:
        driver_type = 'front_runner'
    elif skill > 0.5:
        driver_type = 'midfield'
    else:
        driver_type = 'back_marker'
    
    racecraft_data['drivers'][driver] = {
        'skill_score': float(skill),
        'overtaking_skill': float(skill),
        'driver_type': driver_type,
        'avg_ratio': float(avg_ratio),
        'n_comparisons': len(ratios)
    }

print(f"游릭 Calculated racecraft for {len(racecraft_data['drivers'])} drivers")

游릭 Loaded 1164 race comparisons
游릭 Calculated racecraft for 27 drivers


## Step 4: Save Files

In [5]:
# Save DNF risk
dnf_path = base_path / 'dnf_risk.json'
with open(dnf_path, 'w') as f:
    json.dump(dnf_data, f, indent=2)

print(f"游릭 Saved {dnf_path}")

# Save racecraft
racecraft_path = base_path / 'racecraft.json'
with open(racecraft_path, 'w') as f:
    json.dump(racecraft_data, f, indent=2)

print(f"游릭 Saved {racecraft_path}")

游릭 Saved ../data/processed/testing_files/driver_characteristics/dnf_risk.json
游릭 Saved ../data/processed/testing_files/driver_characteristics/racecraft.json


## Analysis: DNF Risk

In [6]:
print("\nDNF RISK ANALYSIS")
print("="*70)

# Sort by DNF rate
sorted_dnf = sorted(
    dnf_data['drivers'].items(),
    key=lambda x: x[1]['dnf_rate']
)

print("\nLowest DNF Risk (Top 10):")
print(f"{'Driver':<8} {'DNF Rate':<12} {'DNFs':<10} {'Risk Level':<15}")
print("-"*50)

for driver, data in sorted_dnf[:10]:
    print(f"{driver:<8} {data['dnf_rate']:>8.1%}    "
          f"{data['total_dnfs']:>2}/{data['total_races']:<2}  "
          f"{data['risk_level']:<15}")

print("\nHighest DNF Risk (Bottom 10):")
print(f"{'Driver':<8} {'DNF Rate':<12} {'DNFs':<10} {'Risk Level':<15}")
print("-"*50)

for driver, data in sorted_dnf[-10:]:
    print(f"{driver:<8} {data['dnf_rate']:>8.1%}    "
          f"{data['total_dnfs']:>2}/{data['total_races']:<2}  "
          f"{data['risk_level']:<15}")


DNF RISK ANALYSIS

Lowest DNF Risk (Top 10):
Driver   DNF Rate     DNFs       Risk Level     
--------------------------------------------------
PIA          0.0%     0/24  very_low       
BEA          0.0%     0/3   very_low       
VER          4.2%     1/24  very_low       
NOR          4.2%     1/24  very_low       
LEC          8.3%     2/24  low            
HAM          8.3%     2/24  low            
RUS         12.5%     3/24  moderate       
SAI         13.0%     3/23  moderate       
PER         33.3%     8/24  very_high      
ALO         33.3%     8/24  very_high      

Highest DNF Risk (Bottom 10):
Driver   DNF Rate     DNFs       Risk Level     
--------------------------------------------------
COL         55.6%     5/9   very_high      
STR         58.3%    14/24  very_high      
TSU         62.5%    15/24  very_high      
RIC         66.7%    12/18  very_high      
OCO         69.6%    16/23  very_high      
ALB         70.8%    17/24  very_high      
ZHO         75.0%  

## Analysis: Racecraft

In [7]:
print("\nRACECRAFT ANALYSIS")
print("="*70)

# Sort by skill
sorted_skill = sorted(
    racecraft_data['drivers'].items(),
    key=lambda x: x[1]['skill_score'],
    reverse=True
)

print("\nTop 10 Racecraft:")
print(f"{'Driver':<8} {'Skill':<10} {'Type':<15} {'Avg Ratio':<12}")
print("-"*50)

for driver, data in sorted_skill[:10]:
    print(f"{driver:<8} {data['skill_score']:>6.3f}    "
          f"{data['driver_type']:<15} {data['avg_ratio']:<10.4f}")

print("\nBottom 10 Racecraft:")
print(f"{'Driver':<8} {'Skill':<10} {'Type':<15} {'Avg Ratio':<12}")
print("-"*50)

for driver, data in sorted_skill[-10:]:
    print(f"{driver:<8} {data['skill_score']:>6.3f}    "
          f"{data['driver_type']:<15} {data['avg_ratio']:<10.4f}")


RACECRAFT ANALYSIS

Top 10 Racecraft:
Driver   Skill      Type            Avg Ratio   
--------------------------------------------------
VER       0.559    midfield        0.9883    
ALO       0.540    midfield        0.9920    
HAD       0.529    midfield        0.9942    
HUL       0.527    midfield        0.9946    
BOT       0.523    midfield        0.9953    
COL       0.523    midfield        0.9954    
RUS       0.521    midfield        0.9959    
HAM       0.516    midfield        0.9969    
RIC       0.514    midfield        0.9972    
NOR       0.511    midfield        0.9978    

Bottom 10 Racecraft:
Driver   Skill      Type            Avg Ratio   
--------------------------------------------------
DOO       0.483    back_marker     1.0033    
SAR       0.478    back_marker     1.0044    
ZHO       0.476    back_marker     1.0049    
LEC       0.472    back_marker     1.0056    
LAW       0.470    back_marker     1.0061    
TSU       0.459    back_marker     1.0082    
ANT

## Combined Analysis

In [8]:
print("\nCOMBINED DRIVER PROFILES")
print("="*70)
print(f"{'Driver':<8} {'Racecraft':<12} {'DNF Rate':<12} {'Profile':<30}")
print("-"*70)

# Combine data
for driver in sorted(racecraft_data['drivers'].keys()):
    if driver not in dnf_data['drivers']:
        continue
    
    skill = racecraft_data['drivers'][driver]['skill_score']
    dnf_rate = dnf_data['drivers'][driver]['dnf_rate']
    
    # Create profile
    if skill > 0.7 and dnf_rate < 0.10:
        profile = "Elite (high skill, low risk)"
    elif skill > 0.6 and dnf_rate < 0.15:
        profile = "Solid (good skill, moderate risk)"
    elif skill > 0.5:
        profile = "Competitive (average skill)"
    else:
        profile = "Developing"
    
    print(f"{driver:<8} {skill:>8.3f}     {dnf_rate:>8.1%}     {profile:<30}")


COMBINED DRIVER PROFILES
Driver   Racecraft    DNF Rate     Profile                       
----------------------------------------------------------------------
ALB         0.502        70.8%     Competitive (average skill)   
ALO         0.540        33.3%     Competitive (average skill)   
BEA         0.499         0.0%     Developing                    
BOT         0.523        79.2%     Competitive (average skill)   
COL         0.523        55.6%     Competitive (average skill)   
DOO         0.483       100.0%     Developing                    
GAS         0.494        54.2%     Developing                    
HAM         0.516         8.3%     Competitive (average skill)   
HUL         0.527        45.8%     Competitive (average skill)   
LAW         0.470        33.3%     Developing                    
LEC         0.472         8.3%     Developing                    
MAG         0.491        45.5%     Developing                    
NOR         0.511         4.2%     Competitiv