# 11 Model Validation - Full 2025 Season

Compare predictions vs actual results for all 24 races

In [1]:
import json
import sys
from pathlib import Path

sys.path.append('../')

from src.predictors.team_predictor import rank_teams_for_track
from src.utils.validation import (
    analyze_by_stage,
    compare_rankings,
    confidence_calibration,
)

## Load Data

In [2]:
loaded = []
errors = []

# Load tracks
try:
    track_path = Path('../data/processed/testing_files/track_characteristics/2025_track_characteristics.json')
    with open(track_path) as f:
        track_data = json.load(f)
    all_tracks = track_data.get('tracks', {})
    loaded.append(f"tracks ({len(all_tracks)})")
except FileNotFoundError:
    errors.append("track characteristics")
    all_tracks = {}

# Load cars
try:
    car_path = Path('../data/processed/testing_files/car_characteristics/2025_car_characteristics.json')
    with open(car_path) as f:
        car_data = json.load(f)
    all_cars = car_data.get('teams', {})
    loaded.append(f"teams ({len(all_cars)})")
except FileNotFoundError:
    errors.append("car characteristics")
    all_cars = {}

# Load actual results
try:
    results_path = Path('../data/processed/testing_files/validation/2025_qualifying_results.json')
    with open(results_path) as f:
        actual_results = json.load(f)
    loaded.append(f"results ({actual_results.get('total_races', 0)} races)")
except FileNotFoundError:
    errors.append("qualifying results")
    actual_results = {}

# Print summary
if loaded:
    print(f"游릭 Loaded: {', '.join(loaded)}")
if errors:
    print(f"游댮  Missing: {', '.join(errors)}")


游릭 Loaded: tracks (24), teams (10), results (24 races)


## Generate Predictions

In [3]:
all_predictions = {}

print("Generating predictions...")
print("=" * 70)

for race_name, race_data in actual_results['races'].items():
    
    if race_name not in all_tracks:
        print(f"游댮 {race_name}: No track data")
        continue
    
    track_chars = all_tracks[race_name]
    weekend_type = race_data['weekend_type']
    
    if weekend_type == 'sprint':
        stages = [('post_fp1', 'sprint'), ('post_sprint_quali', 'sprint')]
    else:
        stages = [('post_fp1', 'normal'), ('post_fp2', 'normal'), ('post_fp3', 'normal')]
    
    race_predictions = {}
    
    for stage, wtype in stages:
        rankings = rank_teams_for_track(all_cars, track_chars, stage, wtype)
        
        if rankings:
            race_predictions[stage] = {
                'teams': [team for team, _, _, _ in rankings],
                'scores': [score for _, score, _, _ in rankings],
                'confidence': rankings[0][2]
            }
    
    all_predictions[race_name] = race_predictions
    print(f"  {race_name}: {len(race_predictions)} stages")

print(f"\n游릭 Generated predictions for {len(all_predictions)} races")

Generating predictions...
  Australian Grand Prix: 3 stages
  Chinese Grand Prix: 2 stages
  Japanese Grand Prix: 3 stages
  Bahrain Grand Prix: 3 stages
  Saudi Arabian Grand Prix: 3 stages
  Miami Grand Prix: 2 stages
  Emilia Romagna Grand Prix: 3 stages
  Monaco Grand Prix: 3 stages
  Spanish Grand Prix: 3 stages
  Canadian Grand Prix: 3 stages
  Austrian Grand Prix: 3 stages
  British Grand Prix: 3 stages
  Belgian Grand Prix: 2 stages
  Hungarian Grand Prix: 3 stages
  Dutch Grand Prix: 3 stages
  Italian Grand Prix: 3 stages
  Azerbaijan Grand Prix: 3 stages
  Singapore Grand Prix: 3 stages
  United States Grand Prix: 2 stages
  Mexico City Grand Prix: 3 stages
  S칚o Paulo Grand Prix: 2 stages
  Las Vegas Grand Prix: 3 stages
  Qatar Grand Prix: 2 stages
  Abu Dhabi Grand Prix: 3 stages

游릭 Generated predictions for 24 races


## Compare vs Actual

In [4]:
validation_results = {}

for race_name in all_predictions:
    
    actual = actual_results['races'][race_name]
    actual_teams = [pos['team'] for pos in actual['positions']]
    
    race_metrics = {}
    
    for stage, pred_data in all_predictions[race_name].items():
        predicted_teams = pred_data['teams']
        
        metrics = compare_rankings(predicted_teams, actual_teams)
        metrics['confidence'] = pred_data['confidence']
        
        race_metrics[stage] = metrics
    
    validation_results[race_name] = race_metrics

print(f"游릭 Validated {len(validation_results)} races")

游릭 Validated 24 races


## Overall Results

In [5]:
by_stage = analyze_by_stage(validation_results)

print("Overall Results by Stage")
print("=" * 70)

for stage in ['post_fp1', 'post_fp2', 'post_fp3', 'post_sprint_quali']:
    if stage not in by_stage:
        continue
    
    metrics = by_stage[stage]
    
    print(f"\n{stage.upper()}:")
    print(f"  Races: {metrics['count']}")
    print(f"  Winner accuracy: {metrics.get('winner_correct', 0):.1%}")
    print(f"  Top 3 accuracy: {metrics.get('top3_accuracy', 0):.1%}")
    print(f"  Top 5 accuracy: {metrics.get('top5_accuracy', 0):.1%}")
    print(f"  Spearman correlation: {metrics.get('spearman', 0):.3f}")
    print(f"  Avg positions off: {metrics.get('mae_positions', 0):.1f}")

Overall Results by Stage

POST_FP1:
  Races: 24
  Winner accuracy: 33.3%
  Top 3 accuracy: 47.2%
  Top 5 accuracy: 50.0%
  Spearman correlation: 0.528
  Avg positions off: 3.8

POST_FP2:
  Races: 18
  Winner accuracy: 33.3%
  Top 3 accuracy: 40.7%
  Top 5 accuracy: 45.6%
  Spearman correlation: 0.424
  Avg positions off: 4.2

POST_FP3:
  Races: 18
  Winner accuracy: 11.1%
  Top 3 accuracy: 64.8%
  Top 5 accuracy: 66.7%
  Spearman correlation: 0.747
  Avg positions off: 3.3

POST_SPRINT_QUALI:
  Races: 6
  Winner accuracy: 33.3%
  Top 3 accuracy: 38.9%
  Top 5 accuracy: 60.0%
  Spearman correlation: 0.665
  Avg positions off: 3.5


## Best vs Worst

In [6]:
# Get final stage for each race
final_predictions = {}

for race, stages in validation_results.items():
    if 'post_fp3' in stages:
        final_predictions[race] = stages['post_fp3']
    elif 'post_sprint_quali' in stages:
        final_predictions[race] = stages['post_sprint_quali']

sorted_races = sorted(
    final_predictions.items(),
    key=lambda x: x[1].get('top5_accuracy', 0),
    reverse=True
)

print("Best Predictions (Top 5 accuracy):")
print("=" * 70)
for race, metrics in sorted_races[:5]:
    winner = '游릭' if metrics.get('winner_correct', 0) == 1.0 else '游댮'
    print(f"{winner} {race:<30} Top5: {metrics.get('top5_accuracy', 0):.1%}  Spearman: {metrics.get('spearman', 0):.2f}")

print("\nWorst Predictions:")
print("=" * 70)
for race, metrics in sorted_races[-5:]:
    winner = '游릭' if metrics.get('winner_correct', 0) == 1.0 else '游댮'
    print(f"{winner} {race:<30} Top5: {metrics.get('top5_accuracy', 0):.1%}  Spearman: {metrics.get('spearman', 0):.2f}")

Best Predictions (Top 5 accuracy):
游댮 Chinese Grand Prix             Top5: 80.0%  Spearman: 0.96
游댮 Japanese Grand Prix            Top5: 80.0%  Spearman: 0.81
游댮 Spanish Grand Prix             Top5: 80.0%  Spearman: 0.87
游릭 Canadian Grand Prix            Top5: 80.0%  Spearman: 0.96
游댮 British Grand Prix             Top5: 80.0%  Spearman: 0.67

Worst Predictions:
游댮 Singapore Grand Prix           Top5: 60.0%  Spearman: 0.79
游릭 United States Grand Prix       Top5: 60.0%  Spearman: 0.52
游댮 Las Vegas Grand Prix           Top5: 60.0%  Spearman: 0.79
游댮 Qatar Grand Prix               Top5: 60.0%  Spearman: 0.66
游댮 S칚o Paulo Grand Prix           Top5: 40.0%  Spearman: 0.56


## Confidence Calibration

In [7]:
conf_predictions = []

for race, metrics in final_predictions.items():
    conf = metrics.get('confidence', 0.5)
    correct = metrics.get('winner_correct', 0) == 1.0
    conf_predictions.append((conf, correct))

calibration = confidence_calibration(conf_predictions)

print("Confidence Calibration:")
print("=" * 70)
print(f"Brier score: {calibration['brier_score']:.3f} (lower = better)\n")

for bin_range, data in calibration['bins'].items():
    print(f"{bin_range}: Accuracy {data['accuracy']:.1%} ({data['count']} races)")

Confidence Calibration:
Brier score: 0.557 (lower = better)

0.5-0.7: Accuracy 33.3% (6 races)
0.8-1.0: Accuracy 11.1% (18 races)


## Save Results

In [8]:
output = {
    'overall': by_stage,
    'by_race': validation_results,
    'calibration': calibration
}

output_path = Path('../data/processed/testing_files/validation/validation_results.json')
with open(output_path, 'w') as f:
    json.dump(output, f, indent=2)

print(f"游릭 Saved to {output_path}")

游릭 Saved to ../data/processed/testing_files/validation/validation_results.json
