# Driver Position Validation

Validates driver position predictions (1-20) against actual F1 results.

**Tests:**
- Overall MAE and bias
- Position accuracy (¬±1, ¬±2, ¬±3)
- Confidence interval coverage
- Performance by experience tier

**Fixes:** -11 position bias by predicting driver positions instead of team ranks.

## Setup

In [1]:
import fastf1 as ff1
import pandas as pd
import numpy as np
import json
import sys
from pathlib import Path
from collections import defaultdict


sys.path.append('../')
from src.predictors.driver_predictor import DriverRanker

import logging
logging.getLogger("fastf1").setLevel(logging.ERROR)

import warnings
warnings.filterwarnings('ignore')

ff1.Cache.enable_cache('../data/raw/.fastf1_cache')

In [2]:
# CREATE driver_characteristics.json

print("Creating driver_characteristics.json...")
print("="*70)

base_path = Path('../data/processed/testing_files/driver_characteristics')

# Load ratio files
with open(base_path / 'driver_quali_characteristics.json') as f:
    quali_ratios = json.load(f)

with open(base_path / 'driver_race_characteristics.json') as f:
    race_ratios = json.load(f)

# Get all unique drivers
all_drivers = set()
for comp in quali_ratios:
    all_drivers.add(comp['driver'])
for comp in race_ratios:
    all_drivers.add(comp['driver'])

print(f"Found {len(all_drivers)} drivers")

# Aggregate ratios per driver
quali_by_driver = defaultdict(list)
race_by_driver = defaultdict(list)

for comp in quali_ratios:
    quali_by_driver[comp['driver']].append(comp['ratio'])

for comp in race_ratios:
    race_by_driver[comp['driver']].append(comp['ratio'])

# Simple tier assignment based on ratio
def assign_tier(driver, avg_ratio):
    """Simple tier based on pace."""
    # ratio < 0.995 = very fast (veteran/established)
    # ratio 0.995-1.005 = average (developing)
    # ratio > 1.005 = slower (rookie)
    
    if avg_ratio < 0.995:
        return 'veteran'
    elif avg_ratio < 1.005:
        return 'established'
    else:
        return 'developing'

# Create structure
unified = {
    'year': 2024,
    'total_drivers': len(all_drivers),
    'drivers': {}
}

for driver in sorted(all_drivers):
    # Average ratios
    quali_ratio = np.mean(quali_by_driver[driver]) if driver in quali_by_driver else 1.0
    race_ratio = np.mean(race_by_driver[driver]) if driver in race_by_driver else 1.0
    
    # Assign tier
    tier = assign_tier(driver, quali_ratio)
    
    # Convert ratio to pace (ratio < 1.0 = faster)
    quali_pace = 0.5 + (1.0 - quali_ratio) * 5
    quali_pace = max(0.3, min(0.7, quali_pace))
    
    unified['drivers'][driver] = {
        'experience': {
            'tier': tier,
            'total_seasons': 5  # Default
        },
        'pace': {
            'quali_pace': float(quali_pace)
        }
    }

# Save
output_path = base_path / 'driver_characteristics.json'
with open(output_path, 'w') as f:
    json.dump(unified, f, indent=2)

print(f"‚úÖ Created {output_path}")
print(f"‚úÖ {len(unified['drivers'])} drivers ready")

Creating driver_characteristics.json...
Found 27 drivers
‚úÖ Created ../data/processed/testing_files/driver_characteristics/driver_characteristics.json
‚úÖ 27 drivers ready


In [3]:
try:
    # Load driver ranker
    driver_ranker = DriverRanker(
        '../data/processed/testing_files/driver_characteristics/driver_characteristics.json'
    )

    # Load enriched data for tier analysis
    with open('../data/processed/testing_files/driver_characteristics/driver_characteristics.json') as f:
        enriched_data = json.load(f)

    print(f"üü¢ Loaded {enriched_data['total_drivers']} driver profiles")

except Exception as e:
    print(f"üî¥ Failed to load driver ranker: {e}")
    sys.exit(1)

Loaded characteristics for 27 drivers
üü¢ Loaded 27 driver profiles


## Validation Function

In [4]:
def validate_session(year, event, session_type_char):
    """
    Validate driver position predictions for one session.
    
    Uses actual team average positions to create team predictions,
    then converts to driver positions and compares to actual results.
    """
    session_name = 'qualifying' if session_type_char == 'Q' else 'race'
    
    print(f"\n{'='*70}")
    print(f"[{year}] {event} - {session_name.upper()}")
    print('='*70)
    
    # Load session
    try:
        session = ff1.get_session(year, event, session_type_char)
        session.load(laps=False, telemetry=False, weather=False)
    except Exception as e:
        print(f"üî¥ Failed: {e}")
        return None
    
    # Extract actual positions
    actual_positions = {}
    for _, driver in session.results.iterrows():
        abbr = driver['Abbreviation']
        pos = driver['Position']
        if pd.notna(pos) and pd.notna(abbr):
            actual_positions[abbr] = int(pos)
    
    if not actual_positions:
        return None
    
    # Extract team lineups
    lineups = {}
    for team_name in session.results['TeamName'].unique():
        drivers = session.results[session.results['TeamName'] == team_name]['Abbreviation'].tolist()
        if len(drivers) >= 2:
            lineups[team_name] = drivers[:2]
    
    # Create team predictions based on actual average positions
    # NOTE: In production, replace with Bayesian model predictions
    team_avg_pos = {
        team: np.mean([actual_positions.get(d, 20) for d in drvs])
        for team, drvs in lineups.items()
    }
    
    team_predictions = {
        team: rank + 1
        for rank, (team, _) in enumerate(sorted(team_avg_pos.items(), key=lambda x: x[1]))
    }
    
    # Predict driver positions
    try:
        results = driver_ranker.predict_positions(
            team_predictions=team_predictions,
            team_lineups=lineups,
            session_type=session_name
        )
    except Exception as e:
        print(f"üî¥ Prediction failed: {e}")
        return None
    
    # Compare predictions vs actuals
    comparisons = []
    for pred in results['predictions']:
        if pred.driver not in actual_positions:
            continue
        
        actual = actual_positions[pred.driver]
        error = pred.position - actual
        tier = enriched_data['drivers'].get(pred.driver, {}).get('experience', {}).get('tier', 'unknown')
        
        comparisons.append({
            'driver': pred.driver,
            'predicted': pred.position,
            'actual': actual,
            'error': error,
            'abs_error': abs(error),
            'tier': tier,
            'in_ci': pred.confidence_lower <= actual <= pred.confidence_upper
        })
    
    if not comparisons:
        return None
    
    df = pd.DataFrame(comparisons)
    
    # Calculate metrics
    metrics = {
        'year': year,
        'event': event,
        'session': session_name,
        'n_drivers': len(comparisons),
        'mae': df['abs_error'].mean(),
        'bias': df['error'].mean(),
        'rmse': np.sqrt((df['error'] ** 2).mean()),
        'accuracy_1': (df['abs_error'] <= 1).mean(),
        'accuracy_2': (df['abs_error'] <= 2).mean(),
        'accuracy_3': (df['abs_error'] <= 3).mean(),
        'ci_coverage': df['in_ci'].mean(),
        'comparisons': comparisons
    }
    
    # Print summary
    print(f"MAE: {metrics['mae']:.2f}  Bias: {metrics['bias']:+.2f}  "
          f"¬±1: {metrics['accuracy_1']*100:.0f}%  CI: {metrics['ci_coverage']*100:.0f}%")
    
    return metrics

print("üü¢ Validation function ready")

üü¢ Validation function ready


## Test Sessions

Validates on 2024-2025 qualifying and race sessions.

In [5]:
# Define test sessions
test_sessions = [
    # 2024 Qualifying
    (2024, 'Bahrain Grand Prix', 'Q'),
    (2024, 'Saudi Arabian Grand Prix', 'Q'),
    (2024, 'Australian Grand Prix', 'Q'),
    (2024, 'Japanese Grand Prix', 'Q'),
    (2024, 'Chinese Grand Prix', 'Q'),
    (2024, 'Miami Grand Prix', 'Q'),
    (2024, 'Emilia Romagna Grand Prix', 'Q'),
    (2024, 'Monaco Grand Prix', 'Q'),
    (2024, 'Canadian Grand Prix', 'Q'),
    (2024, 'Spanish Grand Prix', 'Q'),
    
    # 2024 Races
    (2024, 'Bahrain Grand Prix', 'R'),
    (2024, 'Saudi Arabian Grand Prix', 'R'),
    (2024, 'Australian Grand Prix', 'R'),
    
    # 2025 Qualifying
    (2025, 'Bahrain Grand Prix', 'Q'),
    (2025, 'Saudi Arabian Grand Prix', 'Q'),
    (2025, 'Australian Grand Prix', 'Q'),
    (2025, 'Japanese Grand Prix', 'Q'),
    (2025, 'Chinese Grand Prix', 'Q'),
    (2025, 'Miami Grand Prix', 'Q'),
    (2025, 'Emilia Romagna Grand Prix', 'Q'),
    (2025, 'Monaco Grand Prix', 'Q'),
    (2025, 'Canadian Grand Prix', 'Q'),
    (2025, 'Spanish Grand Prix', 'Q'),
    
    # 2025 Races
    (2025, 'Bahrain Grand Prix', 'R'),
    (2025, 'Saudi Arabian Grand Prix', 'R'),
    (2025, 'Australian Grand Prix', 'R'),
]

## Run Validation

In [6]:
# Run validation on all test sessions
all_results = []

for year, event, session_type in test_sessions:
    result = validate_session(year, event, session_type)
    if result:
        all_results.append(result)

print(f"\nüü¢ Validated {len(all_results)} sessions")


[2024] Bahrain Grand Prix - QUALIFYING
MAE: 1.30  Bias: +0.00  ¬±1: 55%  CI: 100%

[2024] Saudi Arabian Grand Prix - QUALIFYING
MAE: 1.80  Bias: +0.00  ¬±1: 35%  CI: 95%

[2024] Australian Grand Prix - QUALIFYING
MAE: 1.50  Bias: -0.39  ¬±1: 61%  CI: 94%

[2024] Japanese Grand Prix - QUALIFYING
MAE: 1.80  Bias: +0.00  ¬±1: 60%  CI: 75%

[2024] Chinese Grand Prix - QUALIFYING
MAE: 2.30  Bias: +0.00  ¬±1: 50%  CI: 75%

[2024] Miami Grand Prix - QUALIFYING
MAE: 1.80  Bias: +0.00  ¬±1: 50%  CI: 85%

[2024] Emilia Romagna Grand Prix - QUALIFYING
MAE: 2.00  Bias: +0.00  ¬±1: 55%  CI: 80%

[2024] Monaco Grand Prix - QUALIFYING
MAE: 1.90  Bias: +0.50  ¬±1: 50%  CI: 80%

[2024] Canadian Grand Prix - QUALIFYING
MAE: 2.00  Bias: +0.00  ¬±1: 40%  CI: 90%

[2024] Spanish Grand Prix - QUALIFYING
MAE: 1.50  Bias: +0.00  ¬±1: 50%  CI: 85%

[2024] Bahrain Grand Prix - RACE
MAE: 1.20  Bias: +0.00  ¬±1: 55%  CI: 95%

[2024] Saudi Arabian Grand Prix - RACE
MAE: 1.80  Bias: +0.00  ¬±1: 50%  CI: 85%

[2024

## Overall Summary

In [7]:
if all_results:
    print(f"\n{'='*70}")
    print("OVERALL VALIDATION SUMMARY")
    print('='*70)
    
    # Calculate overall metrics
    mae = np.mean([r['mae'] for r in all_results])
    bias = np.mean([r['bias'] for r in all_results])
    acc1 = np.mean([r['accuracy_1'] for r in all_results])
    acc2 = np.mean([r['accuracy_2'] for r in all_results])
    acc3 = np.mean([r['accuracy_3'] for r in all_results])
    ci_cov = np.mean([r['ci_coverage'] for r in all_results])
    
    print(f"\nAcross {len(all_results)} sessions:")
    print(f"  MAE:          {mae:.2f} positions")
    print(f"  Bias:         {bias:+.2f} positions")
    print(f"  ¬±1 position:  {acc1*100:.1f}%")
    print(f"  ¬±2 positions: {acc2*100:.1f}%")
    print(f"  ¬±3 positions: {acc3*100:.1f}%")
    print(f"  CI coverage:  {ci_cov*100:.1f}%")
    
    # Interpretation
    print(f"\nüí° INTERPRETATION:")
    
    if abs(bias) < 0.5:
        print(f"  üü¢ Low bias ({bias:+.2f}) - predictions are well-calibrated")
    elif bias > 0:
        print(f"  ‚ö†Ô∏è  Positive bias ({bias:+.2f}) - over-predicting positions")
    else:
        print(f"  ‚ö†Ô∏è  Negative bias ({bias:+.2f}) - under-predicting positions")
    
    if mae < 2.5:
        print(f"  üü¢ Low MAE ({mae:.2f}) - good accuracy")
    elif mae < 3.5:
        print(f"  ‚ö†Ô∏è  Moderate MAE ({mae:.2f}) - room for improvement")
    else:
        print(f"  üî¥ High MAE ({mae:.2f}) - needs work")
    
    if ci_cov > 0.8:
        print(f"  üü¢ Good CI coverage ({ci_cov*100:.0f}%) - uncertainty well-calibrated")
    else:
        print(f"  ‚ö†Ô∏è  Low CI coverage ({ci_cov*100:.0f}%) - confidence intervals too narrow")


OVERALL VALIDATION SUMMARY

Across 26 sessions:
  MAE:          2.05 positions
  Bias:         -0.01 positions
  ¬±1 position:  46.3%
  ¬±2 positions: 68.5%
  ¬±3 positions: 81.1%
  CI coverage:  81.1%

üí° INTERPRETATION:
  üü¢ Low bias (-0.01) - predictions are well-calibrated
  üü¢ Low MAE (2.05) - good accuracy
  üü¢ Good CI coverage (81%) - uncertainty well-calibrated


## Performance by Experience Tier

In [8]:
# Combine all comparisons
all_comps = []
for r in all_results:
    all_comps.extend(r['comparisons'])

comp_df = pd.DataFrame(all_comps)

print(f"\n{'='*70}")
print("PERFORMANCE BY EXPERIENCE TIER")
print('='*70)

print(f"\n{'Tier':<15} {'N':>6} {'MAE':>8} {'Bias':>8} {'¬±1':>6} {'¬±2':>6} {'CI':>6}")
print('-'*70)

for tier in ['rookie', 'developing', 'established', 'veteran']:
    tier_data = comp_df[comp_df['tier'] == tier]
    if len(tier_data) > 0:
        n = len(tier_data)
        tier_mae = tier_data['abs_error'].mean()
        tier_bias = tier_data['error'].mean()
        tier_acc1 = (tier_data['abs_error'] <= 1).mean() * 100
        tier_acc2 = (tier_data['abs_error'] <= 2).mean() * 100
        tier_ci = tier_data['in_ci'].mean() * 100
        
        print(f"{tier.upper():<15} {n:>6} {tier_mae:>8.2f} {tier_bias:>+8.2f} "
              f"{tier_acc1:>5.1f}% {tier_acc2:>5.1f}% {tier_ci:>5.1f}%")

# Tier insights
print(f"\nüí° TIER INSIGHTS:")

rookie_mae = comp_df[comp_df['tier'] == 'rookie']['abs_error'].mean() if len(comp_df[comp_df['tier'] == 'rookie']) > 0 else 0
veteran_mae = comp_df[comp_df['tier'] == 'veteran']['abs_error'].mean() if len(comp_df[comp_df['tier'] == 'veteran']) > 0 else 0

if rookie_mae > veteran_mae * 1.5:
    print(f"  ‚ö†Ô∏è  Rookies have much higher error ({rookie_mae:.2f} vs {veteran_mae:.2f})")
    print(f"     ‚Üí Consider increasing rookie uncertainty in driver_ranker.py")
elif rookie_mae > veteran_mae:
    print(f"  üü¢ Rookies slightly less accurate ({rookie_mae:.2f} vs {veteran_mae:.2f}) - expected")
else:
    print(f"  üü¢ Similar accuracy across tiers - good!")


PERFORMANCE BY EXPERIENCE TIER

Tier                 N      MAE     Bias     ¬±1     ¬±2     CI
----------------------------------------------------------------------
DEVELOPING         115     2.25    -1.16  43.5%  66.1%  78.3%
ESTABLISHED        336     1.96    +0.08  47.3%  69.6%  82.4%
VETERAN             65     2.23    +1.55  44.6%  66.2%  78.5%

üí° TIER INSIGHTS:
  üü¢ Similar accuracy across tiers - good!


## Error Analysis

In [9]:
print(f"\n{'='*70}")
print("WORST PREDICTIONS (Top 10)")
print('='*70)

worst = comp_df.nlargest(10, 'abs_error')[['driver', 'predicted', 'actual', 'error', 'tier']]

print(f"\n{'Driver':<8} {'Pred':>6} {'Actual':>6} {'Error':>7} {'Tier':<12}")
print('-'*50)

for _, row in worst.iterrows():
    print(f"{row['driver']:<8} {row['predicted']:>6.1f} {row['actual']:>6} "
          f"{row['error']:>+7.1f} {row['tier']:<12}")

# Error patterns
print(f"\nüí° ERROR PATTERNS:")

# Check if errors cluster by position range
top10_errors = comp_df[comp_df['actual'] <= 10]['error'].mean()
bottom10_errors = comp_df[comp_df['actual'] > 10]['error'].mean()

if abs(top10_errors) > abs(bottom10_errors) + 0.5:
    print(f"  ‚ö†Ô∏è  Larger errors in top 10 (avg: {top10_errors:+.2f})")
elif abs(bottom10_errors) > abs(top10_errors) + 0.5:
    print(f"  ‚ö†Ô∏è  Larger errors in bottom 10 (avg: {bottom10_errors:+.2f})")
else:
    print(f"  üü¢ Errors distributed across grid")


WORST PREDICTIONS (Top 10)

Driver     Pred Actual   Error Tier        
--------------------------------------------------
LAW        10.0     20   -10.0 established 
TSU        10.0     20   -10.0 developing  
TSU        10.0     20   -10.0 developing  
TSU        10.0     19    -9.0 developing  
LAW         6.0     15    -9.0 established 
HAM        10.0     18    -8.0 developing  
VER        11.0      3    +8.0 veteran     
STR        10.0     18    -8.0 established 
SAI        10.0     18    -8.0 established 
VER         9.0      2    +7.0 veteran     

üí° ERROR PATTERNS:
  üü¢ Errors distributed across grid


## Save Results

In [10]:
def to_jsonable(x):
    """Convert numpy/pandas types to JSON-serializable Python types."""
    if isinstance(x, dict):
        return {str(k): to_jsonable(v) for k, v in x.items()}
    if isinstance(x, (list, tuple)):
        return [to_jsonable(v) for v in x]
    if isinstance(x, np.generic):
        return x.item()
    if isinstance(x, np.ndarray):
        return x.tolist()
    if isinstance(x, (pd.Timestamp, pd.Timedelta)):
        return x.isoformat()
    if x is pd.NA:
        return None
    return x


# Package results
output = {
    'summary': {
        'n_sessions': len(all_results),
        'n_predictions': len(comp_df),
        'mae': mae,
        'bias': bias,
        'accuracy_1': acc1,
        'accuracy_2': acc2,
        'accuracy_3': acc3,
        'ci_coverage': ci_cov
    },
    'by_tier': {
        tier: {
            'n': len(comp_df[comp_df['tier'] == tier]),
            'mae': comp_df[comp_df['tier'] == tier]['abs_error'].mean() 
                   if len(comp_df[comp_df['tier'] == tier]) > 0 else None,
            'bias': comp_df[comp_df['tier'] == tier]['error'].mean() 
                    if len(comp_df[comp_df['tier'] == tier]) > 0 else None
        }
        for tier in ['rookie', 'developing', 'established', 'veteran']
    },
    'session_results': all_results
}

output = to_jsonable(output)

# Save
output_path = Path('../data/processed/testing_files/validation/driver_position_validation.json')
output_path.parent.mkdir(parents=True, exist_ok=True)

with open(output_path, 'w') as f:
    json.dump(output, f, indent=2)

print(f"üü¢ Saved validation results to {output_path}")

üü¢ Saved validation results to ../data/processed/testing_files/validation/driver_position_validation.json
