# üèéÔ∏è F1 2025 Season Analysis

This notebook performs an exploratory data analysis (EDA) on the F1 2025 Season race results.
It utilizes the `src` module for data loading and processing to ensure consistency with the dashboard application.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
from pathlib import Path

# Add project root to sys.path to access src module
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.loader import load_data, clean_data
from src.analysis import calculate_driver_stats, calculate_team_stats

# Set plot style
sns.set_theme(style="darkgrid")
plt.rcParams['figure.figsize'] = (12, 6)

print(f"Project root: {project_root}")
print("Modules loaded successfully!")

ModuleNotFoundError: No module named 'src'

## 1. Data Loading and Cleaning
We load the raw CSV data and apply cleaning steps (handling missing values, converting types).

In [None]:
# Load data with correct path
data_path = project_root / 'data' / 'Formula1_2025Season_RaceResults.csv'
df = load_data(str(data_path))
df = clean_data(df)

print(f"Total Records: {len(df)}")
df.head()

## 2. Driver Statistics
Analyzing performance metrics for each driver: Points, Wins, Podiums, and Consistency.

In [None]:
driver_stats = calculate_driver_stats(df)

# Display Top 10 Drivers
display(driver_stats.nlargest(10, 'Total_Points')[['Total_Points', 'Wins', 'Podium', 'Avg_Position']])

### Visualization: Top 10 Drivers

In [None]:
top_drivers = driver_stats.nlargest(10, 'Total_Points').reset_index()

plt.figure(figsize=(12, 6))
sns.barplot(data=top_drivers, x='Total_Points', y='Driver', palette='viridis')
plt.title('Top 10 Drivers by Total Points')
plt.xlabel('Points')
plt.ylabel('Driver')
plt.show()

## 3. Team Performance
Evaluating constructor efficiency based on points per driver and reliability.

In [None]:
team_stats = calculate_team_stats(df)
display(team_stats.sort_values('Total_Points', ascending=False)[['Total_Points', 'Points_Per_Driver', 'Finish_Rate']])

### Visualization: Constructor Standings

In [None]:
team_standings = team_stats.sort_values('Total_Points', ascending=False).reset_index()

plt.figure(figsize=(12, 6))
sns.barplot(data=team_standings, x='Total_Points', y='Team', palette='magma')
plt.title('Constructors Championship Standings')
plt.xlabel('Total Points')
plt.ylabel('Team')
plt.show()

## 4. üèÜ Head-to-Head Driver Comparison
Compare two drivers side by side across multiple metrics.

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

def head_to_head_comparison(df, driver1, driver2):
    """Compare two drivers head-to-head"""
    d1_data = df[df['Driver'] == driver1]
    d2_data = df[df['Driver'] == driver2]
    
    # Get common races
    common_races = set(d1_data['Track']).intersection(set(d2_data['Track']))
    
    d1_filtered = d1_data[d1_data['Track'].isin(common_races)].sort_values('Track')
    d2_filtered = d2_data[d2_data['Track'].isin(common_races)].sort_values('Track')
    
    fig = make_subplots(rows=2, cols=2, 
                        subplot_titles=('Finishing Position by Race', 'Points per Race',
                                       'Grid vs Finish Position', 'Win/Podium Comparison'))
    
    # Plot 1: Position by Race
    fig.add_trace(go.Scatter(x=d1_filtered['Track'], y=d1_filtered['Position'], 
                             mode='lines+markers', name=driver1, line=dict(color='#ff1e00')), row=1, col=1)
    fig.add_trace(go.Scatter(x=d2_filtered['Track'], y=d2_filtered['Position'], 
                             mode='lines+markers', name=driver2, line=dict(color='#00d4ff')), row=1, col=1)
    fig.update_yaxes(autorange="reversed", row=1, col=1)
    
    # Plot 2: Points per Race
    fig.add_trace(go.Bar(x=d1_filtered['Track'], y=d1_filtered['Points'], 
                        name=f'{driver1} Points', marker_color='#ff1e00', opacity=0.7), row=1, col=2)
    fig.add_trace(go.Bar(x=d2_filtered['Track'], y=d2_filtered['Points'], 
                        name=f'{driver2} Points', marker_color='#00d4ff', opacity=0.7), row=1, col=2)
    
    # Plot 3: Grid vs Finish (Scatter)
    fig.add_trace(go.Scatter(x=d1_filtered['Starting Grid'], y=d1_filtered['Position'],
                            mode='markers', name=driver1, marker=dict(size=10, color='#ff1e00')), row=2, col=1)
    fig.add_trace(go.Scatter(x=d2_filtered['Starting Grid'], y=d2_filtered['Position'],
                            mode='markers', name=driver2, marker=dict(size=10, color='#00d4ff')), row=2, col=1)
    # Add diagonal line (no positions gained/lost)
    fig.add_trace(go.Scatter(x=[1,20], y=[1,20], mode='lines', name='No Change', 
                            line=dict(dash='dash', color='gray')), row=2, col=1)
    fig.update_yaxes(autorange="reversed", row=2, col=1)
    
    # Plot 4: Stats Comparison
    d1_stats = driver_stats.loc[driver1]
    d2_stats = driver_stats.loc[driver2]
    
    categories = ['Wins', 'Podium', 'Fastest_Laps']
    d1_values = [d1_stats['Wins'], d1_stats['Podium'], d1_stats['Fastest_Laps']]
    d2_values = [d2_stats['Wins'], d2_stats['Podium'], d2_stats['Fastest_Laps']]
    
    fig.add_trace(go.Bar(x=categories, y=d1_values, name=driver1, marker_color='#ff1e00'), row=2, col=2)
    fig.add_trace(go.Bar(x=categories, y=d2_values, name=driver2, marker_color='#00d4ff'), row=2, col=2)
    
    fig.update_layout(height=800, title_text=f"üèéÔ∏è Head-to-Head: {driver1} vs {driver2}",
                     template='plotly_dark', showlegend=True)
    return fig

# Example: Compare the top 2 championship contenders
fig = head_to_head_comparison(df, 'Oscar Piastri', 'Max Verstappen')
fig.show()

## 5. üìà Championship Points Progression
Visualize how drivers accumulated points throughout the season.

In [None]:
# Define race order for 2025 season
race_order = ['Australia', 'China', 'Japan', 'Bahrain', 'Saudi Arabia', 'Miami', 
              'Emilia-Romagna', 'Monaco', 'Spain', 'Canada', 'Austria', 'Great Britain',
              'Belgium', 'Hungary', 'Netherlands', 'Italy', 'Azerbaijan', 'Singapore',
              'United States', 'Mexico', 'Brazil', 'Las Vegas', 'Qatar']

# Create race number mapping
race_number = {race: i+1 for i, race in enumerate(race_order)}
df['Race_Number'] = df['Track'].map(race_number)

# Get top 8 drivers
top_8_drivers = driver_stats.nlargest(8, 'Total_Points').index.tolist()

# Calculate cumulative points for each driver
cumulative_data = []
for driver in top_8_drivers:
    driver_races = df[df['Driver'] == driver].sort_values('Race_Number')
    cumulative_points = driver_races['Points'].cumsum()
    for i, (_, row) in enumerate(driver_races.iterrows()):
        cumulative_data.append({
            'Driver': driver,
            'Race': row['Track'],
            'Race_Number': row['Race_Number'],
            'Cumulative_Points': cumulative_points.iloc[i]
        })

cumulative_df = pd.DataFrame(cumulative_data)

# Plot Championship Progression
fig = px.line(cumulative_df, x='Race_Number', y='Cumulative_Points', color='Driver',
              markers=True, title='üèÜ Championship Points Progression - Top 8 Drivers',
              labels={'Race_Number': 'Race', 'Cumulative_Points': 'Total Points'},
              template='plotly_dark')

fig.update_layout(height=600, 
                  xaxis=dict(tickmode='array', tickvals=list(range(1, len(race_order)+1)), 
                            ticktext=race_order, tickangle=45))
fig.show()

## 6. üéØ Grid Position vs Finish Position Analysis
Analyze how well drivers convert grid positions to race results.

In [None]:
# Filter only finished races with valid positions
finished_df = df[df['Finished'] == True].copy()
finished_df['Positions_Gained'] = finished_df['Starting Grid'] - finished_df['Position']

# Calculate average positions gained per driver
positions_gained = finished_df.groupby('Driver').agg({
    'Positions_Gained': 'mean',
    'Starting Grid': 'mean',
    'Position': 'mean'
}).round(2)

positions_gained = positions_gained.sort_values('Positions_Gained', ascending=False)

# Plot: Positions Gained/Lost
fig = make_subplots(rows=1, cols=2, 
                    subplot_titles=('Avg Positions Gained/Lost per Driver', 
                                   'Grid Position vs Finish (All Races)'))

# Bar chart - Positions Gained
colors = ['#00ff00' if x >= 0 else '#ff0000' for x in positions_gained['Positions_Gained']]
fig.add_trace(go.Bar(x=positions_gained['Positions_Gained'], 
                     y=positions_gained.index,
                     orientation='h',
                     marker_color=colors,
                     name='Positions Gained'), row=1, col=1)

# Scatter plot - Grid vs Finish
fig.add_trace(go.Scatter(x=finished_df['Starting Grid'], 
                        y=finished_df['Position'],
                        mode='markers',
                        marker=dict(size=8, color=finished_df['Points'], 
                                   colorscale='Viridis', showscale=True,
                                   colorbar=dict(title='Points')),
                        text=finished_df['Driver'],
                        hovertemplate='%{text}<br>Grid: %{x}<br>Finish: %{y}<extra></extra>',
                        name='Race Results'), row=1, col=2)

# Add diagonal line
fig.add_trace(go.Scatter(x=[1, 20], y=[1, 20], mode='lines', 
                        line=dict(dash='dash', color='white'), 
                        name='No Change'), row=1, col=2)

fig.update_yaxes(autorange="reversed", row=1, col=2)
fig.update_layout(height=600, template='plotly_dark', 
                  title_text='üéØ Grid Position vs Race Finish Analysis',
                  showlegend=False)
fig.show()

print("\nüìä Top 5 Position Gainers (Average):")
display(positions_gained.head())

## 7. üèÅ Race-by-Race Heatmap
Visualize all driver finishing positions across all races in a heatmap.

In [None]:
# Create pivot table for heatmap
heatmap_data = df.pivot_table(index='Driver', columns='Track', values='Position', aggfunc='first')

# Reorder columns by race order
heatmap_data = heatmap_data[[col for col in race_order if col in heatmap_data.columns]]

# Sort drivers by total points
sorted_drivers = driver_stats.sort_values('Total_Points', ascending=False).index
heatmap_data = heatmap_data.reindex(sorted_drivers)

# Create heatmap
fig = go.Figure(data=go.Heatmap(
    z=heatmap_data.values,
    x=heatmap_data.columns,
    y=heatmap_data.index,
    colorscale='RdYlGn_r',  # Reversed so 1st place is green, 20th is red
    zmin=1, zmax=20,
    text=heatmap_data.values,
    texttemplate='%{text:.0f}',
    textfont={"size": 10},
    hovertemplate='Driver: %{y}<br>Race: %{x}<br>Position: %{z}<extra></extra>',
    colorbar=dict(title='Position')
))

fig.update_layout(
    title='üèÅ Season Overview: Driver Positions by Race',
    xaxis_title='Race',
    yaxis_title='Driver',
    height=800,
    template='plotly_dark',
    xaxis=dict(tickangle=45)
)
fig.show()

## 8. üèéÔ∏è Team Reliability Analysis
Compare team reliability and DNF rates throughout the season.

In [None]:
# Calculate DNF stats per team
dnf_analysis = df.groupby('Team').agg({
    'Finished': ['sum', 'count'],
    'Points': 'sum'
})
dnf_analysis.columns = ['Finishes', 'Total_Entries', 'Total_Points']
dnf_analysis['DNFs'] = dnf_analysis['Total_Entries'] - dnf_analysis['Finishes']
dnf_analysis['DNF_Rate'] = ((dnf_analysis['DNFs'] / dnf_analysis['Total_Entries']) * 100).round(1)
dnf_analysis['Finish_Rate'] = ((dnf_analysis['Finishes'] / dnf_analysis['Total_Entries']) * 100).round(1)
dnf_analysis = dnf_analysis.sort_values('Finish_Rate', ascending=True)

# Create subplots
fig = make_subplots(rows=1, cols=2,
                    subplot_titles=('Team Reliability (Finish Rate %)', 'DNFs by Team'),
                    specs=[[{"type": "bar"}, {"type": "pie"}]])

# Bar chart - Finish Rate
fig.add_trace(go.Bar(
    x=dnf_analysis['Finish_Rate'],
    y=dnf_analysis.index,
    orientation='h',
    marker=dict(color=dnf_analysis['Finish_Rate'], colorscale='RdYlGn'),
    text=dnf_analysis['Finish_Rate'].astype(str) + '%',
    textposition='outside',
    name='Finish Rate'
), row=1, col=1)

# Pie chart - DNFs distribution
fig.add_trace(go.Pie(
    labels=dnf_analysis.index,
    values=dnf_analysis['DNFs'],
    textinfo='label+value',
    hole=0.4,
    name='DNFs'
), row=1, col=2)

fig.update_layout(
    title_text='üõ†Ô∏è Team Reliability Analysis - 2025 Season',
    height=500,
    template='plotly_dark',
    showlegend=False
)
fig.show()

## 9. ü•á Podium & Win Distribution
Visualize the distribution of wins and podiums across drivers.

In [None]:
# Get podium data
podium_df = df[df['Position'].isin([1, 2, 3])].copy()
podium_df['Position_Label'] = podium_df['Position'].map({1: 'ü•á Win', 2: 'ü•à 2nd', 3: 'ü•â 3rd'})

# Count podiums per driver
podium_counts = podium_df.groupby(['Driver', 'Position_Label']).size().unstack(fill_value=0)
podium_counts['Total_Podiums'] = podium_counts.sum(axis=1)
podium_counts = podium_counts.sort_values('Total_Podiums', ascending=True)

# Filter drivers with at least 1 podium
podium_counts = podium_counts[podium_counts['Total_Podiums'] > 0]

# Create stacked bar chart
fig = go.Figure()

colors = {'ü•á Win': '#FFD700', 'ü•à 2nd': '#C0C0C0', 'ü•â 3rd': '#CD7F32'}

for position in ['ü•á Win', 'ü•à 2nd', 'ü•â 3rd']:
    if position in podium_counts.columns:
        fig.add_trace(go.Bar(
            y=podium_counts.index,
            x=podium_counts[position],
            name=position,
            orientation='h',
            marker_color=colors[position]
        ))

fig.update_layout(
    barmode='stack',
    title='üèÜ Podium Distribution by Driver',
    xaxis_title='Number of Podiums',
    yaxis_title='Driver',
    height=600,
    template='plotly_dark',
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5)
)
fig.show()

# Win distribution pie chart
wins_data = df[df['Position'] == 1].groupby('Driver').size().sort_values(ascending=False)

fig2 = go.Figure(data=[go.Pie(
    labels=wins_data.index,
    values=wins_data.values,
    textinfo='label+value',
    hole=0.5,
    marker=dict(colors=px.colors.qualitative.Set2)
)])

fig2.update_layout(
    title='ü•á Race Wins Distribution - 2025 Season',
    height=500,
    template='plotly_dark',
    annotations=[dict(text=f'{wins_data.sum()}<br>Wins', x=0.5, y=0.5, font_size=20, showarrow=False)]
)
fig2.show()

## 10. üî• Teammate Battles
Compare teammates within the same team.

In [None]:
# Define teammate pairs (main drivers for 2025)
teammate_pairs = {
    'McLaren Mercedes': ['Lando Norris', 'Oscar Piastri'],
    'Red Bull Racing Honda RBPT': ['Max Verstappen', 'Liam Lawson'],
    'Ferrari': ['Charles Leclerc', 'Lewis Hamilton'],
    'Mercedes': ['George Russell', 'Kimi Antonelli'],
    'Aston Martin Aramco Mercedes': ['Fernando Alonso', 'Lance Stroll'],
    'Williams Mercedes': ['Alexander Albon', 'Carlos Sainz'],
    'Haas Ferrari': ['Esteban Ocon', 'Oliver Bearman'],
    'Kick Sauber Ferrari': ['Nico Hulkenberg', 'Gabriel Bortoleto'],
    'Alpine Renault': ['Pierre Gasly', 'Jack Doohan'],
    'Racing Bulls Honda RBPT': ['Yuki Tsunoda', 'Isack Hadjar']
}

def calculate_teammate_battle(df, driver1, driver2):
    """Calculate head-to-head stats between teammates"""
    d1_races = df[df['Driver'] == driver1]
    d2_races = df[df['Driver'] == driver2]
    
    # Get common races where both finished
    common = set(d1_races['Track']).intersection(set(d2_races['Track']))
    
    d1_wins = 0
    d2_wins = 0
    
    for race in common:
        d1_pos = d1_races[d1_races['Track'] == race]['Position'].values
        d2_pos = d2_races[d2_races['Track'] == race]['Position'].values
        
        if len(d1_pos) > 0 and len(d2_pos) > 0:
            if pd.notna(d1_pos[0]) and pd.notna(d2_pos[0]):
                if d1_pos[0] < d2_pos[0]:
                    d1_wins += 1
                elif d2_pos[0] < d1_pos[0]:
                    d2_wins += 1
    
    return d1_wins, d2_wins

# Calculate battles
battles_data = []
for team, drivers in teammate_pairs.items():
    if len(drivers) == 2:
        d1, d2 = drivers
        d1_wins, d2_wins = calculate_teammate_battle(df, d1, d2)
        battles_data.append({
            'Team': team.split()[0],  # Short team name
            'Driver1': d1.split()[-1],  # Last name
            'Driver2': d2.split()[-1],
            'D1_Wins': d1_wins,
            'D2_Wins': d2_wins
        })

battles_df = pd.DataFrame(battles_data)

# Create diverging bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    y=battles_df['Team'],
    x=-battles_df['D1_Wins'],
    orientation='h',
    name='Driver 1',
    marker_color='#ff1e00',
    text=battles_df['Driver1'] + ' (' + battles_df['D1_Wins'].astype(str) + ')',
    textposition='inside'
))

fig.add_trace(go.Bar(
    y=battles_df['Team'],
    x=battles_df['D2_Wins'],
    orientation='h',
    name='Driver 2',
    marker_color='#00d4ff',
    text=battles_df['Driver2'] + ' (' + battles_df['D2_Wins'].astype(str) + ')',
    textposition='inside'
))

fig.update_layout(
    barmode='relative',
    title='üî• Teammate Battles - Race Finish Comparison',
    xaxis_title='Races Won (Left: Driver 1, Right: Driver 2)',
    yaxis_title='Team',
    height=500,
    template='plotly_dark',
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5)
)
fig.show()

## 11. üìä Points Scoring Distribution
Analyze how points are distributed across the grid.

In [None]:
# Points distribution analysis
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('Points Distribution (Box Plot)', 
                                   'Points Frequency by Driver',
                                   'Avg Points per Race by Team',
                                   'Points Scored per Race'))

# 1. Box plot - Points distribution per driver
top_10 = driver_stats.nlargest(10, 'Total_Points').index.tolist()
box_data = df[df['Driver'].isin(top_10)]

for driver in top_10:
    driver_points = box_data[box_data['Driver'] == driver]['Points']
    fig.add_trace(go.Box(y=driver_points, name=driver.split()[-1], boxpoints='all'), row=1, col=1)

# 2. Histogram - Points frequency
fig.add_trace(go.Histogram(x=df['Points'], nbinsx=26, marker_color='#ff1e00', name='Frequency'), row=1, col=2)

# 3. Team avg points per race
team_avg = df.groupby('Team')['Points'].mean().sort_values(ascending=True)
fig.add_trace(go.Bar(x=team_avg.values, y=[t.split()[0] for t in team_avg.index], 
                    orientation='h', marker_color='#00d4ff', name='Avg Points'), row=2, col=1)

# 4. Points per race (line chart)
race_points = df.groupby('Track')['Points'].sum()
race_points = race_points.reindex([r for r in race_order if r in race_points.index])
fig.add_trace(go.Scatter(x=list(range(1, len(race_points)+1)), y=race_points.values,
                        mode='lines+markers', marker_color='#FFD700', name='Total Points'), row=2, col=2)

fig.update_layout(height=800, template='plotly_dark', 
                  title_text='üìä Points Distribution Analysis',
                  showlegend=False)
fig.show()

## 12. üåç Race Track Analysis
Analyze performance patterns across different circuits.

In [None]:
# Race track analysis
from src.analysis import calculate_race_stats
race_stats = calculate_race_stats(df)

# Sort by race order
race_stats = race_stats.reindex([r for r in race_order if r in race_stats.index])

# Create comprehensive track analysis
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('Finish Rate by Track', 
                                   'Total Points Awarded per Race',
                                   'Race Winners',
                                   'Average Grid Spread'))

# 1. Finish Rate
fig.add_trace(go.Bar(x=race_stats.index, y=race_stats['Finish_Rate'],
                    marker=dict(color=race_stats['Finish_Rate'], colorscale='RdYlGn'),
                    name='Finish Rate'), row=1, col=1)

# 2. Points per race
fig.add_trace(go.Bar(x=race_stats.index, y=race_stats['Points'],
                    marker_color='#FFD700', name='Total Points'), row=1, col=2)

# 3. Race Winners
race_winners = df[df['Position'] == 1].groupby('Track')['Driver'].first()
race_winners = race_winners.reindex([r for r in race_order if r in race_winners.index])
winner_counts = race_winners.value_counts()

fig.add_trace(go.Bar(x=winner_counts.index, y=winner_counts.values,
                    marker_color='#ff1e00', name='Race Wins'), row=2, col=1)

# 4. Track with most position changes (avg grid spread)
position_spread = finished_df.groupby('Track')['Positions_Gained'].apply(lambda x: x.abs().mean())
position_spread = position_spread.reindex([r for r in race_order if r in position_spread.index])

fig.add_trace(go.Bar(x=position_spread.index, y=position_spread.values,
                    marker_color='#00d4ff', name='Avg Position Change'), row=2, col=2)

fig.update_layout(height=800, template='plotly_dark',
                  title_text='üåç Race Track Analysis',
                  showlegend=False)
fig.update_xaxes(tickangle=45)
fig.show()

## 13. üé® Driver Performance Radar Chart
Multi-dimensional comparison of driver performance metrics.

In [None]:
def create_radar_chart(drivers_to_compare):
    """Create radar chart comparing multiple drivers"""
    
    # Normalize metrics to 0-100 scale
    metrics = ['Total_Points', 'Wins', 'Podium', 'Finish_Rate', 'Fastest_Laps']
    metric_labels = ['Points', 'Wins', 'Podiums', 'Reliability', 'Fastest Laps']
    
    # Get max values for normalization
    max_vals = {m: driver_stats[m].max() for m in metrics}
    
    fig = go.Figure()
    
    colors = ['#ff1e00', '#00d4ff', '#00ff00', '#FFD700', '#ff6b6b']
    
    for i, driver in enumerate(drivers_to_compare):
        if driver in driver_stats.index:
            stats = driver_stats.loc[driver]
            values = [(stats[m] / max_vals[m]) * 100 if max_vals[m] > 0 else 0 for m in metrics]
            values.append(values[0])  # Close the polygon
            
            fig.add_trace(go.Scatterpolar(
                r=values,
                theta=metric_labels + [metric_labels[0]],
                fill='toself',
                name=driver,
                line=dict(color=colors[i % len(colors)]),
                opacity=0.6
            ))
    
    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 100])
        ),
        showlegend=True,
        title='üé® Driver Performance Radar - Top 5 Championship Contenders',
        template='plotly_dark',
        height=600
    )
    return fig

# Compare top 5 drivers
top_5_drivers = driver_stats.nlargest(5, 'Total_Points').index.tolist()
fig = create_radar_chart(top_5_drivers)
fig.show()

## 14. üìà Summary Statistics Dashboard
Final overview of the 2025 F1 Season.

In [None]:
# Create summary dashboard
print("=" * 60)
print("üèÅ F1 2025 SEASON SUMMARY üèÅ")
print("=" * 60)

# Championship standings
print("\nüèÜ DRIVERS' CHAMPIONSHIP - TOP 10")
print("-" * 40)
top_10_final = driver_stats.nlargest(10, 'Total_Points')[['Total_Points', 'Wins', 'Podium']]
display(top_10_final)

print("\nüèéÔ∏è CONSTRUCTORS' CHAMPIONSHIP")
print("-" * 40)
team_final = team_stats.sort_values('Total_Points', ascending=False)[['Total_Points', 'Finish_Rate']]
display(team_final)

# Key stats
print("\nüìä KEY STATISTICS")
print("-" * 40)
print(f"Total Races: {df['Track'].nunique()}")
print(f"Total Drivers: {df['Driver'].nunique()}")
print(f"Most Wins: {driver_stats['Wins'].idxmax()} ({int(driver_stats['Wins'].max())} wins)")
print(f"Most Podiums: {driver_stats['Podium'].idxmax()} ({int(driver_stats['Podium'].max())} podiums)")
print(f"Best Avg Position: {driver_stats['Avg_Position'].idxmin()} ({driver_stats['Avg_Position'].min():.2f})")
print(f"Most Reliable Team: {team_stats['Finish_Rate'].idxmax()} ({team_stats['Finish_Rate'].max():.1f}%)")

# Final visualization - Season summary
fig = make_subplots(rows=2, cols=2,
                    specs=[[{"type": "indicator"}, {"type": "indicator"}],
                           [{"type": "indicator"}, {"type": "indicator"}]],
                    subplot_titles=('World Champion', 'Constructor Champion', 
                                   'Most Race Wins', 'Total Races'))

champion = driver_stats['Total_Points'].idxmax()
champion_points = driver_stats['Total_Points'].max()
constructor_champ = team_stats['Total_Points'].idxmax()
constructor_points = team_stats['Total_Points'].max()
most_wins_driver = driver_stats['Wins'].idxmax()
most_wins = int(driver_stats['Wins'].max())

fig.add_trace(go.Indicator(
    mode="number",
    value=champion_points,
    title={"text": f"üèÜ {champion}"},
    number={"suffix": " pts"}
), row=1, col=1)

fig.add_trace(go.Indicator(
    mode="number",
    value=constructor_points,
    title={"text": f"üèéÔ∏è {constructor_champ.split()[0]}"},
    number={"suffix": " pts"}
), row=1, col=2)

fig.add_trace(go.Indicator(
    mode="number",
    value=most_wins,
    title={"text": f"ü•á {most_wins_driver}"},
    number={"suffix": " wins"}
), row=2, col=1)

fig.add_trace(go.Indicator(
    mode="number",
    value=df['Track'].nunique(),
    title={"text": "üèÅ Season"},
    number={"suffix": " races"}
), row=2, col=2)

fig.update_layout(height=500, template='plotly_dark',
                  title_text='üèÅ F1 2025 Season Final Summary')
fig.show()

print("\n" + "=" * 60)
print("üéâ END OF ANALYSIS üéâ")
print("=" * 60)

---
# üÜï NEW DATA: Qualifying, Sprint & Sprint Qualifying Analysis

We now have 3 additional datasets to analyze:
- **Qualifying Results** - Saturday qualifying sessions (Q1, Q2, Q3)
- **Sprint Results** - Sprint race results
- **Sprint Qualifying Results** - Sprint shootout results

In [None]:
# Load new datasets
quali_df = pd.read_csv('../data/Formula1_2025Season_QualifyingResults.csv')
sprint_df = pd.read_csv('../data/Formula1_2025Season_SprintResults.csv')
sprint_quali_df = pd.read_csv('../data/Formula1_2025Season_SprintQualifyingResults.csv')

print("üìä New Datasets Loaded:")
print(f"  - Qualifying Results: {len(quali_df)} records, {quali_df['Track'].nunique()} races")
print(f"  - Sprint Results: {len(sprint_df)} records, {sprint_df['Track'].nunique()} sprint races")
print(f"  - Sprint Qualifying: {len(sprint_quali_df)} records, {sprint_quali_df['Track'].nunique()} events")

print("\nüèéÔ∏è Sprint Race Locations:", sprint_df['Track'].unique().tolist())

## 15. ‚è±Ô∏è Qualifying Performance Analysis
Analyze driver performance across Q1, Q2, and Q3 sessions.

In [None]:
# Qualifying statistics
quali_stats = quali_df.groupby('Driver').agg({
    'Position': ['mean', 'min', 'count'],
    'Laps': 'sum'
}).round(2)
quali_stats.columns = ['Avg_Quali_Position', 'Best_Quali', 'Qualifying_Sessions', 'Total_Quali_Laps']
quali_stats = quali_stats.sort_values('Avg_Quali_Position')

# Count pole positions
poles = quali_df[quali_df['Position'] == 1].groupby('Driver').size()
quali_stats['Poles'] = poles.fillna(0).astype(int)

# Q3 appearances
q3_appearances = quali_df[quali_df['Q3'].notna() & (quali_df['Q3'] != '')].groupby('Driver').size()
quali_stats['Q3_Appearances'] = q3_appearances.fillna(0).astype(int)

# Create visualizations
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('üèÜ Pole Positions by Driver', 
                                   'Average Qualifying Position (Top 15)',
                                   'Q3 Appearance Rate',
                                   'Best vs Average Qualifying Position'))

# 1. Pole Positions
pole_data = quali_stats[quali_stats['Poles'] > 0].sort_values('Poles', ascending=True)
fig.add_trace(go.Bar(x=pole_data['Poles'], y=pole_data.index,
                    orientation='h', marker_color='#FFD700',
                    text=pole_data['Poles'], textposition='outside'), row=1, col=1)

# 2. Average Qualifying Position
top_quali = quali_stats.nsmallest(15, 'Avg_Quali_Position')
fig.add_trace(go.Bar(x=top_quali.index, y=top_quali['Avg_Quali_Position'],
                    marker=dict(color=top_quali['Avg_Quali_Position'], colorscale='RdYlGn_r'),
                    text=top_quali['Avg_Quali_Position'], textposition='outside'), row=1, col=2)

# 3. Q3 Appearance Rate
q3_rate = (quali_stats['Q3_Appearances'] / quali_stats['Qualifying_Sessions'] * 100).sort_values(ascending=True)
q3_rate = q3_rate.tail(15)
fig.add_trace(go.Bar(x=q3_rate.values, y=q3_rate.index,
                    orientation='h', 
                    marker=dict(color=q3_rate.values, colorscale='Blues'),
                    text=[f'{v:.0f}%' for v in q3_rate.values], textposition='outside'), row=2, col=1)

# 4. Best vs Average Position
fig.add_trace(go.Scatter(x=quali_stats['Best_Quali'], y=quali_stats['Avg_Quali_Position'],
                        mode='markers+text', 
                        text=quali_stats.index.str.split().str[-1],  # Last name only
                        textposition='top center',
                        marker=dict(size=12, color=quali_stats['Poles'], colorscale='YlOrRd', showscale=True,
                                   colorbar=dict(title='Poles', x=1.02))), row=2, col=2)

fig.update_layout(height=900, template='plotly_dark',
                  title_text='‚è±Ô∏è Qualifying Performance Analysis',
                  showlegend=False)
fig.update_yaxes(autorange="reversed", row=1, col=2)
fig.show()

print("\nüèÜ Top 10 Qualifiers (by Average Position):")
display(quali_stats.nsmallest(10, 'Avg_Quali_Position')[['Avg_Quali_Position', 'Best_Quali', 'Poles', 'Q3_Appearances']])

## 16. üöÄ Qualifying to Race Conversion
Analyze how qualifying positions translate to race results.

In [None]:
# Merge qualifying with race results
quali_race = pd.merge(
    quali_df[['Track', 'Driver', 'Position']].rename(columns={'Position': 'Quali_Position'}),
    df[['Track', 'Driver', 'Position']].rename(columns={'Position': 'Race_Position'}),
    on=['Track', 'Driver'],
    how='inner'
)

# Calculate positions gained/lost from qualifying to race
quali_race['Positions_Change'] = quali_race['Quali_Position'] - quali_race['Race_Position']

# Average conversion per driver
conversion_stats = quali_race.groupby('Driver').agg({
    'Quali_Position': 'mean',
    'Race_Position': 'mean',
    'Positions_Change': 'mean'
}).round(2)
conversion_stats = conversion_stats.sort_values('Positions_Change', ascending=False)

# Create visualizations
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('Quali Position vs Race Finish (All Races)', 
                                   'Average Positions Gained from Quali to Race',
                                   'Pole Position Conversion Rate',
                                   'Front Row vs Race Win Rate'))

# 1. Scatter: Quali vs Race Position
fig.add_trace(go.Scatter(x=quali_race['Quali_Position'], y=quali_race['Race_Position'],
                        mode='markers',
                        marker=dict(size=8, color=quali_race['Positions_Change'],
                                   colorscale='RdYlGn', showscale=True,
                                   colorbar=dict(title='Positions<br>Gained')),
                        text=quali_race['Driver'],
                        hovertemplate='%{text}<br>Quali: P%{x}<br>Race: P%{y}<extra></extra>'), row=1, col=1)
# Add diagonal line
fig.add_trace(go.Scatter(x=[1,20], y=[1,20], mode='lines', 
                        line=dict(dash='dash', color='white')), row=1, col=1)

# 2. Positions Gained bar chart
top_gainers = conversion_stats.nlargest(10, 'Positions_Change')
colors = ['#00ff00' if x >= 0 else '#ff0000' for x in top_gainers['Positions_Change']]
fig.add_trace(go.Bar(x=top_gainers['Positions_Change'], y=top_gainers.index,
                    orientation='h', marker_color=colors,
                    text=[f'+{x:.1f}' if x >= 0 else f'{x:.1f}' for x in top_gainers['Positions_Change']],
                    textposition='outside'), row=1, col=2)

# 3. Pole Conversion Rate (Pole -> Win)
pole_races = quali_race[quali_race['Quali_Position'] == 1]
pole_wins = pole_races.groupby('Driver').apply(lambda x: (x['Race_Position'] == 1).sum())
pole_total = pole_races.groupby('Driver').size()
pole_conversion = (pole_wins / pole_total * 100).dropna().sort_values(ascending=True)

fig.add_trace(go.Bar(x=pole_conversion.values, y=pole_conversion.index,
                    orientation='h', marker_color='#FFD700',
                    text=[f'{v:.0f}%' for v in pole_conversion.values],
                    textposition='outside'), row=2, col=1)

# 4. Front Row (P1-P2) to Podium/Win Rate
front_row = quali_race[quali_race['Quali_Position'] <= 2]
front_row_stats = front_row.groupby('Driver').agg({
    'Race_Position': lambda x: ((x <= 3).sum() / len(x) * 100)  # Podium rate
}).round(1)
front_row_stats.columns = ['Podium_Rate']
front_row_stats = front_row_stats.sort_values('Podium_Rate', ascending=True)

fig.add_trace(go.Bar(x=front_row_stats['Podium_Rate'], y=front_row_stats.index,
                    orientation='h', marker=dict(color=front_row_stats['Podium_Rate'], colorscale='Viridis'),
                    text=[f'{v:.0f}%' for v in front_row_stats['Podium_Rate']],
                    textposition='outside'), row=2, col=2)

fig.update_yaxes(autorange="reversed", row=1, col=1)
fig.update_layout(height=900, template='plotly_dark',
                  title_text='üöÄ Qualifying to Race Conversion Analysis',
                  showlegend=False)
fig.show()

## 17. üèÉ Sprint Race Analysis
Analyze driver performance in Sprint races (shorter format races with 8 points for winner).

In [None]:
# Sprint Race Statistics
sprint_df['Points'] = pd.to_numeric(sprint_df['Points'], errors='coerce').fillna(0)
sprint_df['Position'] = pd.to_numeric(sprint_df['Position'], errors='coerce')

sprint_stats = sprint_df[sprint_df['Position'].notna()].groupby('Driver').agg({
    'Points': 'sum',
    'Position': ['mean', 'min', 'count']
}).round(2)
sprint_stats.columns = ['Sprint_Points', 'Avg_Sprint_Position', 'Best_Sprint', 'Sprint_Races']
sprint_stats = sprint_stats.sort_values('Sprint_Points', ascending=False)

# Sprint wins
sprint_wins = sprint_df[sprint_df['Position'] == 1].groupby('Driver').size()
sprint_stats['Sprint_Wins'] = sprint_wins.fillna(0).astype(int)

# Sprint podiums
sprint_podiums = sprint_df[sprint_df['Position'] <= 3].groupby('Driver').size()
sprint_stats['Sprint_Podiums'] = sprint_podiums.fillna(0).astype(int)

# Create visualizations
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('Sprint Points by Driver', 
                                   'Sprint Race Wins Distribution',
                                   'Average Sprint Position (Top 15)',
                                   'Sprint Consistency (Position Spread)'),
                    specs=[[{"type": "bar"}, {"type": "pie"}],
                           [{"type": "bar"}, {"type": "box"}]])

# 1. Sprint Points Bar
top_sprint = sprint_stats.nlargest(12, 'Sprint_Points')
fig.add_trace(go.Bar(x=top_sprint.index, y=top_sprint['Sprint_Points'],
                    marker=dict(color=top_sprint['Sprint_Points'], colorscale='Reds'),
                    text=top_sprint['Sprint_Points'], textposition='outside'), row=1, col=1)

# 2. Sprint Wins Pie
wins_data = sprint_stats[sprint_stats['Sprint_Wins'] > 0]['Sprint_Wins']
fig.add_trace(go.Pie(labels=wins_data.index, values=wins_data.values,
                    textinfo='label+value', hole=0.4,
                    marker=dict(colors=px.colors.qualitative.Set2)), row=1, col=2)

# 3. Average Sprint Position
avg_sprint = sprint_stats.nsmallest(15, 'Avg_Sprint_Position')
fig.add_trace(go.Bar(x=avg_sprint.index, y=avg_sprint['Avg_Sprint_Position'],
                    marker=dict(color=avg_sprint['Avg_Sprint_Position'], colorscale='RdYlGn_r'),
                    text=avg_sprint['Avg_Sprint_Position'], textposition='outside'), row=2, col=1)

# 4. Sprint Position Box Plot (top drivers)
top_sprint_drivers = sprint_stats.nlargest(8, 'Sprint_Points').index.tolist()
for driver in top_sprint_drivers:
    driver_sprints = sprint_df[sprint_df['Driver'] == driver]['Position'].dropna()
    fig.add_trace(go.Box(y=driver_sprints, name=driver.split()[-1], boxpoints='all'), row=2, col=2)

fig.update_layout(height=900, template='plotly_dark',
                  title_text='üèÉ Sprint Race Performance Analysis',
                  showlegend=False)
fig.update_yaxes(autorange="reversed", row=2, col=1)
fig.show()

print("\nüèÉ Sprint Race Standings:")
display(sprint_stats.head(10)[['Sprint_Points', 'Sprint_Wins', 'Sprint_Podiums', 'Avg_Sprint_Position']])

## 18. üìä Sprint Qualifying (Shootout) Analysis
Analyze performance in the Sprint Shootout format.

In [None]:
# Sprint Qualifying Statistics
sprint_quali_df['Position'] = pd.to_numeric(sprint_quali_df['Position'], errors='coerce')

sq_stats = sprint_quali_df[sprint_quali_df['Position'].notna()].groupby('Driver').agg({
    'Position': ['mean', 'min', 'count'],
    'Laps': 'sum'
}).round(2)
sq_stats.columns = ['Avg_SQ_Position', 'Best_SQ', 'SQ_Sessions', 'Total_SQ_Laps']
sq_stats = sq_stats.sort_values('Avg_SQ_Position')

# Sprint Poles
sq_poles = sprint_quali_df[sprint_quali_df['Position'] == 1].groupby('Driver').size()
sq_stats['Sprint_Poles'] = sq_poles.fillna(0).astype(int)

# SQ3 appearances
sq3_appearances = sprint_quali_df[sprint_quali_df['Q3'].notna() & (sprint_quali_df['Q3'] != '')].groupby('Driver').size()
sq_stats['SQ3_Appearances'] = sq3_appearances.fillna(0).astype(int)

# Create visualizations
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('Sprint Poles Distribution', 
                                   'Average Sprint Quali Position',
                                   'Sprint Quali vs Sprint Race',
                                   'SQ3 Appearance Rate'))

# 1. Sprint Poles
poles_data = sq_stats[sq_stats['Sprint_Poles'] > 0].sort_values('Sprint_Poles', ascending=True)
fig.add_trace(go.Bar(x=poles_data['Sprint_Poles'], y=poles_data.index,
                    orientation='h', marker_color='#ff1e00',
                    text=poles_data['Sprint_Poles'], textposition='outside'), row=1, col=1)

# 2. Average SQ Position
top_sq = sq_stats.nsmallest(15, 'Avg_SQ_Position')
fig.add_trace(go.Bar(x=top_sq.index, y=top_sq['Avg_SQ_Position'],
                    marker=dict(color=top_sq['Avg_SQ_Position'], colorscale='RdYlGn_r'),
                    text=top_sq['Avg_SQ_Position'], textposition='outside'), row=1, col=2)

# 3. Sprint Quali Position vs Sprint Race Position
sq_sprint = pd.merge(
    sprint_quali_df[['Track', 'Driver', 'Position']].rename(columns={'Position': 'SQ_Position'}),
    sprint_df[['Track', 'Driver', 'Position']].rename(columns={'Position': 'Sprint_Position'}),
    on=['Track', 'Driver'],
    how='inner'
)
sq_sprint['Sprint_Position'] = pd.to_numeric(sq_sprint['Sprint_Position'], errors='coerce')
sq_sprint = sq_sprint.dropna()

fig.add_trace(go.Scatter(x=sq_sprint['SQ_Position'], y=sq_sprint['Sprint_Position'],
                        mode='markers', 
                        marker=dict(size=10, color=sq_sprint['SQ_Position'] - sq_sprint['Sprint_Position'],
                                   colorscale='RdYlGn', showscale=True),
                        text=sq_sprint['Driver'],
                        hovertemplate='%{text}<br>SQ: P%{x}<br>Sprint: P%{y}<extra></extra>'), row=2, col=1)
fig.add_trace(go.Scatter(x=[1,20], y=[1,20], mode='lines', 
                        line=dict(dash='dash', color='white')), row=2, col=1)

# 4. SQ3 Rate
sq3_rate = (sq_stats['SQ3_Appearances'] / sq_stats['SQ_Sessions'] * 100).sort_values(ascending=True).tail(15)
fig.add_trace(go.Bar(x=sq3_rate.values, y=sq3_rate.index,
                    orientation='h',
                    marker=dict(color=sq3_rate.values, colorscale='Purples'),
                    text=[f'{v:.0f}%' for v in sq3_rate.values], textposition='outside'), row=2, col=2)

fig.update_yaxes(autorange="reversed", row=1, col=2)
fig.update_yaxes(autorange="reversed", row=2, col=1)
fig.update_layout(height=900, template='plotly_dark',
                  title_text='üìä Sprint Qualifying (Shootout) Analysis',
                  showlegend=False)
fig.show()

## 19. üéØ Saturday vs Sunday Performance Comparison
Compare driver performance between Sprint/Qualifying (Saturday) and Race (Sunday).

In [None]:
# Create comprehensive comparison
comparison_data = []

for driver in driver_stats.index:
    row = {'Driver': driver}
    
    # Race stats
    if driver in driver_stats.index:
        row['Race_Points'] = driver_stats.loc[driver, 'Total_Points']
        row['Race_Avg_Position'] = driver_stats.loc[driver, 'Avg_Position']
    
    # Qualifying stats
    if driver in quali_stats.index:
        row['Quali_Avg_Position'] = quali_stats.loc[driver, 'Avg_Quali_Position']
        row['Poles'] = quali_stats.loc[driver, 'Poles']
    
    # Sprint stats
    if driver in sprint_stats.index:
        row['Sprint_Points'] = sprint_stats.loc[driver, 'Sprint_Points']
        row['Sprint_Avg_Position'] = sprint_stats.loc[driver, 'Avg_Sprint_Position']
    
    comparison_data.append(row)

comparison_df = pd.DataFrame(comparison_data).set_index('Driver')
comparison_df = comparison_df.fillna(0)

# Calculate total points (Race + Sprint)
comparison_df['Total_All_Points'] = comparison_df['Race_Points'] + comparison_df['Sprint_Points']

# Create radar comparison for top 5
fig = make_subplots(rows=2, cols=2,
                    subplot_titles=('Total Points Breakdown (Race + Sprint)',
                                   'Quali Position vs Race Position',
                                   'One-Lap Pace vs Race Pace Comparison',
                                   'Saturday Specialist vs Sunday Specialist'),
                    specs=[[{"type": "bar"}, {"type": "scatter"}],
                           [{"type": "bar"}, {"type": "bar"}]])

# 1. Stacked bar - Race + Sprint Points
top_total = comparison_df.nlargest(12, 'Total_All_Points')
fig.add_trace(go.Bar(x=top_total.index, y=top_total['Race_Points'],
                    name='Race Points', marker_color='#ff1e00'), row=1, col=1)
fig.add_trace(go.Bar(x=top_total.index, y=top_total['Sprint_Points'],
                    name='Sprint Points', marker_color='#FFD700'), row=1, col=1)

# 2. Quali vs Race Position
fig.add_trace(go.Scatter(x=comparison_df['Quali_Avg_Position'], 
                        y=comparison_df['Race_Avg_Position'],
                        mode='markers+text',
                        text=comparison_df.index.str.split().str[-1],
                        textposition='top center',
                        marker=dict(size=12, color=comparison_df['Total_All_Points'],
                                   colorscale='Viridis', showscale=True)), row=1, col=2)
fig.add_trace(go.Scatter(x=[1,20], y=[1,20], mode='lines',
                        line=dict(dash='dash', color='white')), row=1, col=2)

# 3. One-Lap vs Race Pace
# One-lap = Quali, Race = Race
comparison_df['Quali_Race_Diff'] = comparison_df['Quali_Avg_Position'] - comparison_df['Race_Avg_Position']
sorted_diff = comparison_df.sort_values('Quali_Race_Diff')

# Better in race (negative = quali worse than race)
race_better = sorted_diff.head(10)
fig.add_trace(go.Bar(x=race_better.index, y=-race_better['Quali_Race_Diff'],
                    marker_color='#00ff00', name='Better in Race',
                    text=[f'+{-v:.1f}' for v in race_better['Quali_Race_Diff']],
                    textposition='outside'), row=2, col=1)

# 4. Saturday vs Sunday Specialist
# Saturday = avg of Quali + Sprint positions, Sunday = Race position
comparison_df['Saturday_Avg'] = (comparison_df['Quali_Avg_Position'] + 
                                  comparison_df['Sprint_Avg_Position'].replace(0, np.nan)) / 2
comparison_df['Saturday_Avg'] = comparison_df['Saturday_Avg'].fillna(comparison_df['Quali_Avg_Position'])
comparison_df['Sat_Sun_Diff'] = comparison_df['Saturday_Avg'] - comparison_df['Race_Avg_Position']

# Filter and show
specialist_data = comparison_df[comparison_df['Race_Avg_Position'] > 0].sort_values('Sat_Sun_Diff')
colors = ['#FFD700' if x < 0 else '#00d4ff' for x in specialist_data['Sat_Sun_Diff']]
fig.add_trace(go.Bar(x=specialist_data.index, y=specialist_data['Sat_Sun_Diff'],
                    marker_color=colors,
                    text=[f'{v:.1f}' for v in specialist_data['Sat_Sun_Diff']],
                    textposition='outside'), row=2, col=2)

fig.update_layout(height=900, template='plotly_dark', barmode='stack',
                  title_text='üéØ Saturday vs Sunday Performance Comparison',
                  showlegend=True)
fig.update_xaxes(tickangle=45, row=2)
fig.show()

print("\nüèÜ Combined Championship (Race + Sprint Points):")
display(comparison_df.nlargest(10, 'Total_All_Points')[['Race_Points', 'Sprint_Points', 'Total_All_Points', 'Quali_Avg_Position', 'Race_Avg_Position']])

## 20. üìà Complete Season Summary with All Data
Final comprehensive summary including all datasets.

In [None]:
# Final Summary with all datasets
print("=" * 70)
print("üèÅ F1 2025 COMPLETE SEASON SUMMARY - ALL DATA üèÅ")
print("=" * 70)

print("\nüìä DATA OVERVIEW:")
print(f"  ‚Ä¢ Race Results: {len(df)} entries across {df['Track'].nunique()} races")
print(f"  ‚Ä¢ Qualifying Results: {len(quali_df)} entries")
print(f"  ‚Ä¢ Sprint Races: {len(sprint_df)} entries across {sprint_df['Track'].nunique()} sprints")
print(f"  ‚Ä¢ Sprint Qualifying: {len(sprint_quali_df)} entries")

print("\n" + "=" * 70)
print("üèÜ DRIVERS' CHAMPIONSHIP (Race + Sprint)")
print("=" * 70)

# Complete standings
final_standings = comparison_df.nlargest(20, 'Total_All_Points')[['Race_Points', 'Sprint_Points', 'Total_All_Points']].copy()
final_standings['Poles'] = [quali_stats.loc[d, 'Poles'] if d in quali_stats.index else 0 for d in final_standings.index]
final_standings['Race_Wins'] = [driver_stats.loc[d, 'Wins'] if d in driver_stats.index else 0 for d in final_standings.index]
display(final_standings)

print("\n" + "=" * 70)
print("üèéÔ∏è KEY STATISTICS")
print("=" * 70)

champion = final_standings.index[0]
print(f"\nü•á World Champion: {champion}")
print(f"   Total Points: {final_standings.loc[champion, 'Total_All_Points']:.0f}")
print(f"   (Race: {final_standings.loc[champion, 'Race_Points']:.0f} + Sprint: {final_standings.loc[champion, 'Sprint_Points']:.0f})")

print(f"\nüèÜ Most Pole Positions: {quali_stats['Poles'].idxmax()} ({int(quali_stats['Poles'].max())} poles)")
print(f"ü•á Most Race Wins: {driver_stats['Wins'].idxmax()} ({int(driver_stats['Wins'].max())} wins)")
print(f"üèÉ Most Sprint Wins: {sprint_stats['Sprint_Wins'].idxmax()} ({int(sprint_stats['Sprint_Wins'].max())} sprint wins)")
print(f"‚ö° Best Qualifier: {quali_stats['Avg_Quali_Position'].idxmin()} (avg P{quali_stats['Avg_Quali_Position'].min():.2f})")

# Final indicator chart
fig = make_subplots(rows=2, cols=3,
                    specs=[[{"type": "indicator"}]*3, [{"type": "indicator"}]*3],
                    subplot_titles=('World Champion', 'Total Points', 'Pole Positions',
                                   'Race Wins', 'Sprint Wins', 'Total Races'))

fig.add_trace(go.Indicator(
    mode="number",
    value=0,
    title={"text": f"üèÜ {champion}"},
    number={"suffix": ""}
), row=1, col=1)

fig.add_trace(go.Indicator(
    mode="number",
    value=final_standings.loc[champion, 'Total_All_Points'],
    title={"text": "Total Points"},
    number={"suffix": " pts"}
), row=1, col=2)

fig.add_trace(go.Indicator(
    mode="number",
    value=quali_stats['Poles'].max(),
    title={"text": f"Poles ({quali_stats['Poles'].idxmax().split()[-1]})"},
    number={"suffix": ""}
), row=1, col=3)

fig.add_trace(go.Indicator(
    mode="number",
    value=driver_stats['Wins'].max(),
    title={"text": f"Race Wins ({driver_stats['Wins'].idxmax().split()[-1]})"},
    number={"suffix": ""}
), row=2, col=1)

fig.add_trace(go.Indicator(
    mode="number",
    value=sprint_stats['Sprint_Wins'].max(),
    title={"text": f"Sprint Wins ({sprint_stats['Sprint_Wins'].idxmax().split()[-1]})"},
    number={"suffix": ""}
), row=2, col=2)

fig.add_trace(go.Indicator(
    mode="number",
    value=df['Track'].nunique(),
    title={"text": "Races Completed"},
    number={"suffix": ""}
), row=2, col=3)

fig.update_layout(height=500, template='plotly_dark',
                  title_text='üèÅ F1 2025 Season - Final Statistics Dashboard')
fig.show()

print("\n" + "=" * 70)
print("üéâ END OF COMPLETE ANALYSIS üéâ")
print("=" * 70)