# 19: 2025 Sequential Predictions (Bayesian Learning)

Test the 2026 prediction system using 2025 season as proxy.

## Workflow

**Week 0 (Pre-season Testing):**
- Load 2024 characteristics as priors
- High uncertainty (regulation changes)

**Week 1-24 (Each Race):**
- Make prediction with current priors
- Race happens
- Update beliefs (Bayesian)
- Posteriors â†’ Next week's priors
- Uncertainty decreases

## Key Features

- ðŸŸ¢ REAL 2025 data (not estimated)
- ðŸŸ¢ Sequential learning (each race improves)
- ðŸŸ¢ Uncertainty tracking (high â†’ low)
- ðŸŸ¢ Interactive Plotly visualizations
- ðŸŸ¢ Compare predictions vs actuals

## Setup

In [None]:
import json
import numpy as np
import pandas as pd
import fastf1 as ff1
from pathlib import Path
from collections import defaultdict
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
import logging

logging.getLogger("fastf1").setLevel(logging.ERROR)
warnings.filterwarnings('ignore')

ff1.Cache.enable_cache('../data/raw/.fastf1_cache')

print("ðŸŸ¢ Setup complete")

## Week 0: Initialize Priors from 2024

Start with 2024 season data as baseline.
Add uncertainty to simulate regulation changes.

In [None]:
# Load 2024 data as starting point
base_path = Path('../data/processed/testing_files/driver_characteristics')

with open(base_path / 'driver_characteristics.json') as f:
    priors_2024 = json.load(f)

with open(base_path / 'dnf_risk.json') as f:
    dnf_2024 = json.load(f)

with open(base_path / 'racecraft.json') as f:
    racecraft_2024 = json.load(f)

print(f"ðŸŸ¢ Loaded 2024 data:")
print(f"   - {len(priors_2024['drivers'])} drivers")
print(f"   - DNF risk for {len(dnf_2024['drivers'])} drivers")
print(f"   - Racecraft for {len(racecraft_2024['drivers'])} drivers")

In [None]:
# Create Week 0 priors (2024 data + uncertainty)
priors_week0 = {
    'week': 0,
    'season': 2025,
    'description': 'Initial priors from 2024 season with added uncertainty',
    'races_seen': 0,
    'drivers': {}
}

# 2025 grid
grid_2025 = [
    'VER', 'LAW', 'NOR', 'PIA', 'LEC', 'HAM', 'RUS', 'ANT',
    'ALO', 'STR', 'GAS', 'DOO', 'OCO', 'BEA', 'TSU', 'HAD',
    'SAI', 'COL', 'HUL', 'BOR'
]

for driver in grid_2025:
    pace_2024 = priors_2024['drivers'].get(driver, {}).get('pace', {}).get('quali_pace', 0.5)
    dnf_2024_rate = dnf_2024['drivers'].get(driver, {}).get('dnf_rate', 0.1)
    racecraft_2024_score = racecraft_2024['drivers'].get(driver, {}).get('skill_score', 0.5)
    
    priors_week0['drivers'][driver] = {
        'pace': {
            'quali_pace': float(pace_2024),
            'uncertainty': 0.15,
            'confidence': 'low'
        },
        'racecraft': {
            'skill_score': float(racecraft_2024_score),
            'uncertainty': 0.10
        },
        'dnf_risk': {
            'rate': float(dnf_2024_rate),
            'total_races': 0,
            'total_dnfs': 0
        },
        'races_seen': 0
    }

print(f"\nðŸŸ¢ Created Week 0 priors for {len(priors_week0['drivers'])} drivers")
print(f"\nSample (VER):")
print(f"   Pace: {priors_week0['drivers']['VER']['pace']['quali_pace']:.3f} Â± {priors_week0['drivers']['VER']['pace']['uncertainty']:.3f}")
print(f"   Racecraft: {priors_week0['drivers']['VER']['racecraft']['skill_score']:.3f}")
print(f"   DNF risk: {priors_week0['drivers']['VER']['dnf_risk']['rate']:.1%}")

## Helper Functions

In [None]:
"""
TRUE BAYESIAN UNCERTAINTY CALCULATION - FIXED!
No hardcoded factors - calculate from actual data!
"""

import copy

def calculate_observation_variance(pace_history):
    """
    Calculate observation variance from driver's pace history.
    
    Args:
        pace_history: List of floats [0.85, 0.78, 0.92, ...]
    
    Returns:
        float: Observation variance (high = inconsistent, low = consistent)
    """
    if len(pace_history) < 2:
        # Not enough data, use high default variance
        return 0.05  # High uncertainty for new drivers
    
    # Get recent pace observations (last 5 races)
    # FIX: pace_history is already a list of floats!
    recent_paces = pace_history[-5:]
    
    # Calculate variance
    obs_var = np.var(recent_paces)
    
    # Add minimum floor (observations always have some noise)
    min_var = 0.001
    return max(obs_var, min_var)


def bayesian_uncertainty_update(prior_uncertainty, observation_variance, n_observations=1):
    """
    TRUE Bayesian uncertainty update.
    
    Uses variance addition formula:
    1/ÏƒÂ²_posterior = 1/ÏƒÂ²_prior + n/ÏƒÂ²_observation
    
    No hardcoded factors!
    
    Args:
        prior_uncertainty: Current uncertainty (standard deviation)
        observation_variance: Variance of new observations
        n_observations: Number of observations (default 1, 0.5 for DNF)
    
    Returns:
        float: Updated uncertainty (standard deviation)
    """
    # Convert uncertainty (std dev) to variance
    prior_var = prior_uncertainty ** 2
    
    # Bayesian update formula
    posterior_var = 1.0 / (1.0/prior_var + n_observations/observation_variance)
    
    # Convert back to uncertainty (std dev)
    posterior_uncertainty = np.sqrt(posterior_var)
    
    return float(posterior_uncertainty)


def bayesian_update(priors, race_results):
    """
    TRULY BAYESIAN update using actual data variance.
    
    Key features:
    - Uncertainty calculated from actual observation variance
    - Consistent drivers â†’ Low variance â†’ Fast uncertainty reduction
    - Erratic drivers â†’ High variance â†’ Slow uncertainty reduction
    - No hardcoded factors!
    """
    posteriors = copy.deepcopy(priors)
    
    posteriors['week'] = priors['week'] + 1
    posteriors['races_seen'] = priors['races_seen'] + 1
    
    grid_size = 20
    updates = []
    
    for driver, result in race_results.items():
        if driver not in posteriors['drivers']:
            continue
        
        driver_data = posteriors['drivers'][driver]
        races_seen = driver_data['races_seen']
        
        # Learning rate (exponential decay)
        alpha = max(0.05, 1.0 / (races_seen + 2))
        
        # UPDATE PACE
        observed_pace = 1.0 - (result['quali_pos'] - 1) / (grid_size - 1)
        prior_pace = driver_data['pace']['quali_pace']
        new_pace = (1 - alpha) * prior_pace + alpha * observed_pace
        
        driver_data['pace']['quali_pace'] = float(new_pace)
        
        # Store pace history (as floats!)
        if 'pace_history' not in driver_data:
            driver_data['pace_history'] = []
        driver_data['pace_history'].append(float(observed_pace))
        

        # BAYESIAN UNCERTAINTY UPDATE
        dnf = result.get('dnf', False)
        prior_uncertainty = driver_data['pace']['uncertainty']
        
        if dnf:
            # DNF: We got quali data but no race data
            # Observation variance is higher (less information)
            obs_var = 0.08  # High variance = low confidence
            n_obs = 0.5  # Partial observation
        
        else:
            # Clean finish: Calculate actual variance from history
            obs_var = calculate_observation_variance(driver_data['pace_history'])
            n_obs = 1.0  # Full observation
        
        # TRUE BAYESIAN UPDATE (no hardcoded factors!)
        new_uncertainty = bayesian_uncertainty_update(
            prior_uncertainty, 
            obs_var, 
            n_obs
        )
        
        driver_data['pace']['uncertainty'] = float(new_uncertainty)
        

        # UPDATE DNF RISK
        driver_data['dnf_risk']['total_races'] += 1
        if dnf:
            driver_data['dnf_risk']['total_dnfs'] += 1
        
        total_races = driver_data['dnf_risk']['total_races']
        total_dnfs = driver_data['dnf_risk']['total_dnfs']
        driver_data['dnf_risk']['rate'] = float(total_dnfs / total_races) if total_races > 0 else 0.0
        

        # UPDATE RACECRAFT
        if not dnf and 'race_pos' in result:
            gain = result['quali_pos'] - result['race_pos']
            skill_delta = gain * 0.02
            current_skill = driver_data['racecraft']['skill_score']
            new_skill = np.clip(current_skill + alpha * skill_delta, 0.2, 0.9)
            driver_data['racecraft']['skill_score'] = float(new_skill)
        
        driver_data['races_seen'] = races_seen + 1
        
        # Track changes
        updates.append({
            'driver': driver,
            'pace_change': new_pace - prior_pace,
            'uncertainty_old': prior_uncertainty,
            'uncertainty_new': new_uncertainty,
            'obs_variance': obs_var,
            'dnf': dnf,
            'alpha': alpha
        })
    
    return posteriors, updates


def extract_race_results(year, race_name):
    """Extract actual results from FastF1."""
    print(f"Extracting results for {race_name} {year}...")
    
    try:
        quali = ff1.get_session(year, race_name, 'Q')
        quali.load(laps=False, telemetry=False, weather=False)
        
        race = ff1.get_session(year, race_name, 'R')
        race.load(laps=False, telemetry=False, weather=False)
        
        results = {}
        
        for _, row in quali.results.iterrows():
            driver = row['Abbreviation']
            quali_pos = row['Position']
            
            if pd.notna(driver) and pd.notna(quali_pos):
                results[driver] = {'quali_pos': int(quali_pos)}
        
        for _, row in race.results.iterrows():
            driver = row['Abbreviation']
            race_pos = row['Position']
            
            dnf = row.dnf if hasattr(row, 'dnf') else False
            status = str(row['Status']) if 'Status' in row else ''
            if not dnf and status:
                dnf = 'Finished' not in status and '+' not in status
            
            if pd.notna(driver) and driver in results:
                if pd.notna(race_pos):
                    results[driver]['race_pos'] = int(race_pos)
                results[driver]['dnf'] = dnf
        
        print(f"ðŸŸ¢ Extracted {len(results)} drivers")
        return results
        
    except Exception as e:
        print(f"ðŸ”´ Error: {e}")
        return None

## Sequential Learning: Process Multiple Races

In [None]:
# Define race sequence
races_2025 = [
    'Australian Grand Prix',
    'Chinese Grand Prix',
    'Japanese Grand Prix',
    'Bahrain Grand Prix',
    'Saudi Arabian Grand Prix',
    'Miami Grand Prix'
]

# Track evolution for multiple drivers
evolution = defaultdict(list)
current_priors = priors_week0

# Drivers to track
tracked_drivers = ['VER', 'NOR', 'LEC', 'HAM', 'PIA']

print("\nSEQUENTIAL LEARNING ACROSS 2025 SEASON")
print("="*70)

for week, race_name in enumerate(races_2025, 1):
    print(f"\n{'='*70}")
    print(f"Week {week}: {race_name}")
    print('='*70)
    
    results = extract_race_results(2025, race_name)
    
    if not results:
        print(f"ðŸ”´  No data available")
        continue
    
    current_priors, updates = bayesian_update(current_priors, results)
    
    # Track all drivers
    for driver in tracked_drivers:
        if driver in current_priors['drivers']:
            d = current_priors['drivers'][driver]
            evolution[driver].append({
                'week': week,
                'race': race_name,
                'pace': d['pace']['quali_pace'],
                'uncertainty': d['pace']['uncertainty'],
                'dnf_rate': d['dnf_risk']['rate'],
                'racecraft': d['racecraft']['skill_score']
            })
    
    print(f"\nðŸ“Š Week {week} Summary:")
    print(f"   Races seen: {current_priors['races_seen']}")
    
    if 'VER' in current_priors['drivers']:
        ver = current_priors['drivers']['VER']
        print(f"   VER pace: {ver['pace']['quali_pace']:.3f} Â± {ver['pace']['uncertainty']:.3f}")
        print(f"   VER DNF rate: {ver['dnf_risk']['rate']:.1%}")

print(f"\n\nðŸŸ¢ Processed {len(races_2025)} races")

## Interactive Visualizations (Plotly)

In [None]:
# Create interactive plots for all tracked drivers

# Driver colors
colors = {
    'VER': '#0600EF',  # Red Bull
    'NOR': '#FF8700',  # McLaren
    'LEC': '#DC0000',  # Ferrari
    'HAM': '#DC0000',  # Ferrari
    'PIA': '#FF8700'   # McLaren
}

# Plot 1: Pace Evolution with Uncertainty Bands
fig1 = go.Figure()

for driver in tracked_drivers:
    if driver in evolution and evolution[driver]:
        df = pd.DataFrame(evolution[driver])
        
        # Main line
        fig1.add_trace(go.Scatter(
            x=df['week'],
            y=df['pace'],
            name=driver,
            line=dict(color=colors.get(driver, '#333'), width=3),
            mode='lines+markers',
            marker=dict(size=8)
        ))
        
        # Uncertainty band
        fig1.add_trace(go.Scatter(
            x=df['week'].tolist() + df['week'].tolist()[::-1],
            y=(df['pace'] + df['uncertainty']).tolist() + (df['pace'] - df['uncertainty']).tolist()[::-1],
            fill='toself',
            fillcolor=colors.get(driver, '#333'),
            opacity=0.2,
            line=dict(width=0),
            showlegend=False,
            hoverinfo='skip'
        ))

fig1.update_layout(
    title=dict(
        text='<b>Pace Evolution with Uncertainty (2025 Season)</b>',
        font=dict(size=20)
    ),
    xaxis=dict(
        title='Week (Race Number)',
        tickmode='linear',
        tick0=1,
        dtick=1
    ),
    yaxis=dict(
        title='Quali Pace',
        range=[0.3, 1]
    ),
    hovermode='x unified',
    height=500,
    template='plotly_white'
)

fig1.show()

print("ðŸŸ¢ Pace evolution plot rendered")

In [None]:
# Plot 2: Uncertainty Reduction
fig2 = go.Figure()

for driver in tracked_drivers:
    if driver in evolution and evolution[driver]:
        df = pd.DataFrame(evolution[driver])
        
        fig2.add_trace(go.Scatter(
            x=df['week'],
            y=df['uncertainty'],
            name=driver,
            line=dict(color=colors.get(driver, '#333'), width=3),
            mode='lines+markers',
            marker=dict(size=8)
        ))

fig2.update_layout(
    title=dict(
        text='<b>Uncertainty Reduction Over Time</b>',
        font=dict(size=20)
    ),
    xaxis=dict(
        title='Week (Race Number)',
        tickmode='linear',
        tick0=1,
        dtick=1
    ),
    yaxis=dict(
        title='Uncertainty (Â±)',
        range=[0, 0.16]
    ),
    hovermode='x unified',
    height=400,
    template='plotly_white'
)

fig2.show()

print("ðŸŸ¢ Uncertainty plot rendered")

In [None]:
# Plot 3: DNF Rate Evolution
fig3 = go.Figure()

for driver in tracked_drivers:
    if driver in evolution and evolution[driver]:
        df = pd.DataFrame(evolution[driver])
        
        fig3.add_trace(go.Scatter(
            x=df['week'],
            y=df['dnf_rate'] * 100,
            name=driver,
            line=dict(color=colors.get(driver, '#333'), width=3),
            mode='lines+markers',
            marker=dict(size=8)
        ))

fig3.update_layout(
    title=dict(
        text='<b>DNF Rate Evolution (Running Average)</b>',
        font=dict(size=20)
    ),
    xaxis=dict(
        title='Week (Race Number)',
        tickmode='linear',
        tick0=1,
        dtick=1
    ),
    yaxis=dict(
        title='DNF Rate (%)',
        range=[0, 30]
    ),
    hovermode='x unified',
    height=400,
    template='plotly_white'
)

fig3.show()

print("ðŸŸ¢ DNF rate plot rendered")

In [None]:
# Plot 4: Learning Rate Decay
weeks = list(range(1, 25))
alphas = [max(0.05, 1.0 / (w + 1)) for w in weeks]

fig4 = go.Figure()

fig4.add_trace(go.Scatter(
    x=weeks,
    y=alphas,
    mode='lines+markers',
    line=dict(color='#E10600', width=3),
    marker=dict(size=8),
    name='Learning Rate (Î±)'
))

fig4.update_layout(
    title=dict(
        text='<b>Learning Rate Decay Over Season</b>',
        font=dict(size=20)
    ),
    xaxis=dict(
        title='Week (Race Number)',
        tickmode='linear',
        tick0=1,
        dtick=2
    ),
    yaxis=dict(
        title='Learning Rate (Î±)',
        range=[0, 0.55]
    ),
    annotations=[
        dict(
            x=1, y=0.5,
            text="Fast learning<br>(high Î±)",
            showarrow=True,
            arrowhead=2
        ),
        dict(
            x=20, y=0.05,
            text="Stable estimates<br>(low Î±)",
            showarrow=True,
            arrowhead=2
        )
    ],
    height=400,
    template='plotly_white'
)

fig4.show()

print("ðŸŸ¢ Learning rate plot rendered")

## Analysis & Insights

In [None]:
print("\n" + "="*70)
print("LEARNING ANALYSIS")
print("="*70)

for driver in tracked_drivers:
    if driver in evolution and len(evolution[driver]) > 1:
        first = evolution[driver][0]
        last = evolution[driver][-1]
        
        pace_change = last['pace'] - first['pace']
        uncert_reduction = (1 - last['uncertainty']/first['uncertainty']) * 100
        
        print(f"\n{driver}:")
        print(f"   Pace: {first['pace']:.3f} â†’ {last['pace']:.3f} (Î” {pace_change:+.3f})")
        print(f"   Uncertainty: {first['uncertainty']:.3f} â†’ {last['uncertainty']:.3f} (-{uncert_reduction:.1f}%)")
        print(f"   DNF Rate: {first['dnf_rate']:.1%} â†’ {last['dnf_rate']:.1%}")
        print(f"   Racecraft: {first['racecraft']:.3f} â†’ {last['racecraft']:.3f}")

## Export: Save Current State

In [None]:
# Save final state
output_path = Path('../data/processed/testing_files/priors/2025_week6_posteriors.json')
output_path.parent.mkdir(parents=True, exist_ok=True)

with open(output_path, 'w') as f:
    json.dump(current_priors, f, indent=2)

print(f"ðŸŸ¢ Saved final state to: {output_path}")