# 12 Prediction Bias Analysis

Which teams are we systematically over/under-rating?

In [1]:
import json
import sys
from collections import defaultdict
from pathlib import Path

import numpy as np

sys.path.append('../')

from src.predictors.team_predictor import rank_teams_for_track
from src.utils.team_mapping import canonicalize_team

## Load Data

In [2]:
loaded = []
errors = []

try:
    track_path = Path('../data/processed/testing_files/track_characteristics/2025_track_characteristics.json')
    with open(track_path) as f:
        all_tracks = json.load(f)['tracks']
    loaded.append(f"tracks ({len(all_tracks)})")
except FileNotFoundError:
    errors.append("track characteristics")
    all_tracks = []

try:
    car_path = Path('../data/processed/testing_files/car_characteristics/2025_car_characteristics.json')
    with open(car_path) as f:
        all_cars = json.load(f)['teams']
    loaded.append(f"cars ({len(all_cars)})")
except FileNotFoundError:
    errors.append("car characteristics")
    all_cars = []

try:
    results_path = Path('../data/processed/testing_files/validation/2025_qualifying_results.json')
    with open(results_path) as f:
        actual_results = json.load(f)
    loaded.append(f"results ({actual_results.get('total_races', 0)} races)")
except FileNotFoundError:
    errors.append("qualifying results")
    actual_results = {}

# Print summary
if loaded:
    print(f"ðŸŸ¢ Loaded: {', '.join(loaded)}")
if errors:
    print(f"ðŸ”´  Missing: {', '.join(errors)}")

ðŸŸ¢ Loaded: tracks (24), cars (10), results (24 races)


## Calculate Position Bias

Bias = Predicted Position - Actual Position
- Negative bias = Underrating (predicted worse than actual)
- Positive bias = Overrating (predicted better than actual)

In [3]:
#Collect all predictions and actuals
all_biases = defaultdict(lambda: {'fp1': [], 'fp2': [], 'fp3': [], 'sprint_quali': []})

for race_name, race_data in actual_results['races'].items():
    if race_name not in all_tracks:
        continue
    
    track_chars = all_tracks[race_name]
    weekend_type = race_data['weekend_type']
    
    # Get actual positions
    actual_positions = {}
    for pos_data in race_data['positions']:
        team = canonicalize_team(pos_data['team'])
        actual_positions[team] = pos_data['position']
    
    # Get predictions for each stage
    if weekend_type == 'sprint':
        stages = [('post_fp1', 'sprint', 'fp1'), ('post_sprint_quali', 'sprint', 'sprint_quali')]
    else:
        stages = [('post_fp1', 'normal', 'fp1'), ('post_fp2', 'normal', 'fp2'), ('post_fp3', 'normal', 'fp3')]
    
    for stage_key, wtype, stage_name in stages:
        rankings = rank_teams_for_track(all_cars, track_chars, stage_key, wtype)
        
        if not rankings:
            continue
        
        # Calculate bias for each team
        for pred_pos, (team, score, conf, _) in enumerate(rankings, 1):
            team_canonical = canonicalize_team(team)
            
            if team_canonical in actual_positions:
                actual_pos = actual_positions[team_canonical]
                bias = pred_pos - actual_pos  # Positive = overrating
                all_biases[team_canonical][stage_name].append(bias)

print(f"ðŸŸ¢ Calculated biases for {len(all_biases)} teams")

ðŸŸ¢ Calculated biases for 10 teams


## Average Bias by Team

In [4]:
# Calculate average bias for each team
team_avg_bias = {}

for team, stages in all_biases.items():
    team_avg_bias[team] = {}
    
    for stage, biases in stages.items():
        if biases:
            team_avg_bias[team][stage] = np.mean(biases)
        else:
            team_avg_bias[team][stage] = None

# Show results
print("Average Position Bias by Team (FP3):")
print("=" * 70)
print("Negative = Underrating, Positive = Overrating")
print()

# Sort by FP3 bias
fp3_biases = [(team, data['fp3']) for team, data in team_avg_bias.items() if data.get('fp3') is not None]
fp3_biases.sort(key=lambda x: x[1])

for team, bias in fp3_biases:
    if bias < -1:
        status = "UNDERRATED"
    elif bias > 1:
        status = "OVERRATED"
    else:
        status = "OK"
    
    print(f"{team:<20} {bias:+.1f} positions  {status}")

Average Position Bias by Team (FP3):
Negative = Underrating, Positive = Overrating

RED BULL             -11.6 positions  UNDERRATED
WILLIAMS             -9.9 positions  UNDERRATED
ALPINE               -9.6 positions  UNDERRATED
AUDI                 -8.4 positions  UNDERRATED
ASTON MARTIN         -7.9 positions  UNDERRATED
MERCEDES             -7.9 positions  UNDERRATED
RB                   -7.6 positions  UNDERRATED
HAAS                 -6.8 positions  UNDERRATED
FERRARI              -4.3 positions  UNDERRATED
MCLAREN              -1.5 positions  UNDERRATED


## Bias Evolution (FP1 â†’ FP2 â†’ FP3)

In [5]:
print("Bias Evolution by Session:")
print("=" * 70)
print(f"{'Team':<20} {'FP1':>8} {'FP2':>8} {'FP3':>8} {'Trend':>10}")
print("-" * 70)

for team, data in sorted(team_avg_bias.items()):
    fp1 = data.get('fp1')
    fp2 = data.get('fp2')
    fp3 = data.get('fp3')
    
    if fp1 is None or fp3 is None:
        continue
    
    # Check trend
    if abs(fp3 - fp1) < 0.5:
        trend = "Stable"
    elif fp3 > fp1:
        trend = "Worse"  # More positive = more overrated
    else:
        trend = "Better"
    
    fp1_str = f"{fp1:+.1f}" if fp1 is not None else "N/A"
    fp2_str = f"{fp2:+.1f}" if fp2 is not None else "N/A"
    fp3_str = f"{fp3:+.1f}" if fp3 is not None else "N/A"
    
    print(f"{team:<20} {fp1_str:>8} {fp2_str:>8} {fp3_str:>8} {trend:>10}")

Bias Evolution by Session:
Team                      FP1      FP2      FP3      Trend
----------------------------------------------------------------------
ALPINE                   -9.4    -11.7     -9.6     Stable
ASTON MARTIN            -10.2    -10.0     -7.9      Worse
AUDI                     -8.1    -10.9     -8.4     Stable
FERRARI                  -4.0     -6.7     -4.3     Stable
HAAS                     -6.7     -6.8     -6.8     Stable
MCLAREN                  -2.1     -2.9     -1.5      Worse
MERCEDES                 -3.7     -1.2     -7.9     Better
RB                       -8.5    -10.6     -7.6      Worse
RED BULL                -10.3     -8.3    -11.6     Better
WILLIAMS                -12.9     -6.4     -9.9      Worse


## Which Teams Get Predicted Right?

In [6]:
# Count how often each team is within 2 positions
accuracy_by_team = defaultdict(lambda: {'fp1': 0, 'fp2': 0, 'fp3': 0, 'total': 0})

for team, stages in all_biases.items():
    for stage, biases in stages.items():
        if stage == 'sprint_quali':
            continue
        
        total = len(biases)
        accurate = sum(1 for b in biases if abs(b) <= 2)  # Within 2 positions
        
        if total > 0:
            accuracy_by_team[team][stage] = accurate / total
            accuracy_by_team[team]['total'] = total

print("Prediction Accuracy by Team (Â±2 positions):")
print("=" * 70)
print(f"{'Team':<20} {'FP1':>8} {'FP2':>8} {'FP3':>8}")
print("-" * 70)

for team, data in sorted(accuracy_by_team.items(), key=lambda x: x[1].get('fp3', 0), reverse=True):
    fp1 = data.get('fp1', 0)
    fp2 = data.get('fp2', 0)
    fp3 = data.get('fp3', 0)
    
    print(f"{team:<20} {fp1:>7.0%} {fp2:>7.0%} {fp3:>7.0%}")

Prediction Accuracy by Team (Â±2 positions):
Team                      FP1      FP2      FP3
----------------------------------------------------------------------
MCLAREN                  62%     67%     72%
FERRARI                  46%     17%     33%
ASTON MARTIN              8%     11%     11%
HAAS                      8%     11%     11%
WILLIAMS                  0%     22%      6%
RB                        4%      0%      6%
MERCEDES                 50%     28%      6%
RED BULL                  4%     11%      0%
ALPINE                    0%      0%      0%
AUDI                      0%      0%      0%


## Problem Teams Analysis

In [7]:
# Find teams where FP3 is much worse than FP1
print("Teams Where FP3 Ruins Prediction:")
print("=" * 70)

for team, data in sorted(team_avg_bias.items()):
    fp1 = data.get('fp1')
    fp3 = data.get('fp3')
    
    if fp1 is None or fp3 is None:
        continue
    
    degradation = abs(fp3) - abs(fp1)
    
    if degradation > 1.0:  # FP3 is more wrong than FP1
        print(f"{team:<20} FP1: {fp1:+.1f}  â†’  FP3: {fp3:+.1f}  (Î”: {degradation:+.1f})")

Teams Where FP3 Ruins Prediction:
MERCEDES             FP1: -3.7  â†’  FP3: -7.9  (Î”: +4.2)
RED BULL             FP1: -10.3  â†’  FP3: -11.6  (Î”: +1.3)
