In [1]:
# Environment Setup and Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from pathlib import Path
import importlib

# Configure project root for module imports
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Reload analysis module to ensure latest functions are available
import src.analysis
importlib.reload(src.analysis)

from src.loader import load_data, clean_data
from src.analysis import (calculate_driver_stats, calculate_team_stats, 
                          calculate_combined_standings, calculate_combined_constructor_standings, 
                          F1_2025_CALENDAR)

plt.style.use('seaborn-v0_8-darkgrid')
print('Environment configured successfully.')

Environment configured successfully.


In [None]:
# Environment Setup and Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys
import os
from pathlib import Path
import importlib
import warnings
warnings.filterwarnings('ignore')

# Configure project root for module imports
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Reload analysis module to ensure latest functions are available
import src.analysis
import src.config
importlib.reload(src.analysis)
importlib.reload(src.config)

from src.loader import load_data, clean_data
from src.analysis import (calculate_driver_stats, calculate_team_stats, 
                          calculate_combined_standings, calculate_combined_constructor_standings)
from src.config import TEAM_COLORS, F1_2025_CALENDAR

# Set visualization style
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12

print('Environment configured successfully.')
print(f'Team colors loaded: {len(TEAM_COLORS)} teams')

In [None]:
# Load Race and Sprint Datasets for Combined Championship Standings
try:
    df_race = load_data('../data/Formula1_2025Season_RaceResults.csv')
    df_sprint = load_data('../data/Formula1_2025Season_SprintResults.csv')
    df_race = clean_data(df_race)
    
    # Calculate driver and team statistics
    driver_stats = calculate_driver_stats(df_race)
    team_stats = calculate_team_stats(df_race)
    
    # Calculate official combined standings (Race + Sprint points as per FIA regulations)
    combined_driver_standings = calculate_combined_standings(df_race, df_sprint)
    combined_team_standings = calculate_combined_constructor_standings(df_race, df_sprint)
    
    print(f'Dataset loaded: {len(df_race)} race records from {df_race["Track"].nunique()} Grand Prix')
    print(f'Dataset loaded: {len(df_sprint)} sprint records from {df_sprint["Track"].nunique()} Sprint events')
    print(f'\nChampionship Leader: {combined_driver_standings.index[0]} with {combined_driver_standings["Total_Points"].iloc[0]} points')
except Exception as e:
    print(f'Error loading data: {e}')
    raise

---

## 1. Championship Standings

Official FIA World Championship standings combining Race and Sprint points.

---

In [None]:
# Drivers' World Championship Standings (Combined Race + Sprint Points)
top_drivers = combined_driver_standings.head(10).reset_index()

fig = px.bar(top_drivers, x='Total_Points', y='Driver', orientation='h',
             color='Total_Points', color_continuous_scale='Reds',
             title="2025 FIA Formula 1 Drivers' World Championship",
             text='Total_Points',
             hover_data=['Race_Points', 'Sprint_Points', 'Race_Wins'],
             height=500)

fig.update_layout(
    yaxis={'categoryorder':'total ascending'},
    template='plotly_dark',
    margin=dict(l=150, r=50, t=80, b=50),
    title_font_size=18,
    font=dict(size=12)
)
fig.update_traces(textposition='outside', textfont_size=11)
fig.show()

print("\n2025 Drivers' Championship Standings (Post-Qatar GP):")
display(combined_driver_standings.head(10).style.background_gradient(cmap='Reds', subset=['Total_Points']))

In [None]:
# Constructors' World Championship Standings (Combined Race + Sprint Points)
teams_sorted = combined_team_standings.reset_index()

# Create color list based on team colors
team_color_list = [TEAM_COLORS.get(team, '#888888') for team in teams_sorted['Team']]

fig = go.Figure(data=[
    go.Bar(
        x=teams_sorted['Team'],
        y=teams_sorted['Total_Points'],
        marker_color=team_color_list,
        text=teams_sorted['Total_Points'],
        textposition='outside',
        hovertemplate='<b>%{x}</b><br>Total: %{y} pts<extra></extra>'
    )
])

fig.update_layout(
    title="2025 FIA Formula 1 Constructors' World Championship",
    title_font_size=18,
    template='plotly_dark',
    height=500,
    margin=dict(l=50, r=50, t=80, b=100),
    xaxis_tickangle=-30,
    yaxis_title='Total Points',
    font=dict(size=12)
)
fig.show()

print("\n2025 Constructors' Championship Standings (Post-Qatar GP):")
display(combined_team_standings.style.background_gradient(cmap='Blues', subset=['Total_Points']))

---

## 2. Championship Progression

Cumulative points development throughout the 2025 season following the official F1 calendar order.

---

In [None]:
# Sort race data according to official calendar order
available_tracks = [t for t in F1_2025_CALENDAR if t in df_race['Track'].unique()]
track_order_map = {track: i for i, track in enumerate(available_tracks)}
df_race['Track_Order'] = df_race['Track'].map(track_order_map)
df_sorted = df_race.sort_values('Track_Order')

# Aggregate race points per driver per track
race_points = df_sorted.groupby(['Driver', 'Track']).agg({'Points': 'sum'}).reset_index()
race_points = race_points.pivot(index='Track', columns='Driver', values='Points').fillna(0)

# Reorder tracks according to calendar
race_points = race_points.reindex([t for t in available_tracks if t in race_points.index])

# Add sprint points for sprint weekend tracks
sprint_tracks = df_sprint['Track'].unique()
for track in race_points.index:
    if track in sprint_tracks:
        sprint_track = df_sprint[df_sprint['Track'] == track]
        for driver in race_points.columns:
            sprint_pts = sprint_track[sprint_track['Driver'] == driver]['Points'].sum()
            race_points.loc[track, driver] += sprint_pts

# Calculate cumulative championship points
cumulative_points = race_points.cumsum()

# Filter to top 10 championship contenders
top_10_names = combined_driver_standings.head(10).index.tolist()
cumulative_top10 = cumulative_points[[d for d in top_10_names if d in cumulative_points.columns]]

fig = px.line(cumulative_top10, 
              title='2025 Championship Progression - Top 10 Drivers',
              labels={'value': 'Cumulative Points', 'variable': 'Driver', 'Track': 'Grand Prix'},
              height=550)

fig.update_layout(
    template='plotly_dark',
    legend_title='Driver',
    title_font_size=18,
    margin=dict(l=50, r=50, t=80, b=120),
    xaxis_tickangle=-45,
    font=dict(size=12),
    legend=dict(font=dict(size=11))
)
fig.update_traces(line=dict(width=2.5))
fig.show()

---

## 3. Head-to-Head Teammate Comparison

Intra-team performance analysis comparing teammates' race finishing positions.

---

In [None]:
# Calculate Head-to-Head Teammate Battle Statistics
team_h2h = {}
for team in df_race['Team'].unique():
    drivers = df_race[df_race['Team'] == team]['Driver'].unique()
    if len(drivers) >= 2:
        d1, d2 = drivers[0], drivers[1]
        team_df = df_race[df_race['Team'] == team]
        
        h2h_data = team_df.pivot(index='Track', columns='Driver', values='Position')
        if d1 in h2h_data.columns and d2 in h2h_data.columns:
            d1_wins = (h2h_data[d1] < h2h_data[d2]).sum()
            d2_wins = (h2h_data[d2] < h2h_data[d1]).sum()
            team_h2h[team] = {'Driver1': d1, 'Driver2': d2, 'D1_Wins': d1_wins, 'D2_Wins': d2_wins}

h2h_df = pd.DataFrame(team_h2h).T

print('Head-to-Head Teammate Battles (Race Finishes):')
print('=' * 70)
for team, row in h2h_df.iterrows():
    print(f"{team:20} | {row['Driver1']:18} {int(row['D1_Wins']):2} - {int(row['D2_Wins']):2} {row['Driver2']:18}")
print('=' * 70)

In [None]:
# Visualize Head-to-Head Teammate Battles
fig = make_subplots(rows=2, cols=5, subplot_titles=list(h2h_df.index),
                    specs=[[{'type': 'pie'}]*5, [{'type': 'pie'}]*5])

for idx, (team, row) in enumerate(h2h_df.iterrows()):
    r = idx // 5 + 1
    c = idx % 5 + 1
    
    team_color = TEAM_COLORS.get(team, '#888888')
    # Create lighter version for second driver
    d2_color = team_color + '80'  # Add transparency
    
    fig.add_trace(
        go.Pie(
            labels=[row['Driver1'].split()[-1], row['Driver2'].split()[-1]],
            values=[row['D1_Wins'], row['D2_Wins']],
            marker_colors=[team_color, '#555555'],
            textinfo='value',
            textfont_size=12,
            hole=0.4
        ),
        row=r, col=c
    )

fig.update_layout(
    title_text='Teammate Head-to-Head Battle Results (Race Wins vs Teammate)',
    title_font_size=18,
    template='plotly_dark',
    height=600,
    showlegend=False,
    margin=dict(l=30, r=30, t=100, b=30)
)
fig.show()

---

## 4. Race Results Heatmap

Visual matrix of finishing positions across all drivers and Grand Prix events.

---

In [None]:
# Create Race Finishing Position Heatmap (Driver x Track)
# Sort drivers by championship position
driver_order = combined_driver_standings.index.tolist()

position_pivot = df_race.pivot(index='Driver', columns='Track', values='Position')
position_pivot = position_pivot.reindex(driver_order)

# Reorder columns according to F1 calendar
position_pivot = position_pivot[[t for t in available_tracks if t in position_pivot.columns]]

# Use matplotlib for better control over heatmap readability
fig, ax = plt.subplots(figsize=(18, 12))

# Create heatmap with better spacing
sns.heatmap(position_pivot, 
            cmap='RdYlGn_r',
            annot=True, 
            fmt='.0f',
            linewidths=1.5,
            linecolor='#333333',
            cbar_kws={'label': 'Finishing Position', 'shrink': 0.8},
            annot_kws={'size': 9, 'weight': 'bold'},
            ax=ax,
            vmin=1, vmax=20,
            mask=position_pivot.isna())

ax.set_title('Race Finishing Positions Matrix - 2025 Season\n', fontsize=16, fontweight='bold')
ax.set_xlabel('\nGrand Prix', fontsize=12)
ax.set_ylabel('Driver (Championship Order)\n', fontsize=12)

# Rotate x labels for better readability
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)

plt.tight_layout()
plt.show()

print('\nNote: Darker red = worse position, Darker green = better position')
print('Empty cells indicate the driver did not participate or was DNS/DSQ')

---

## 5. Grid to Finish Position Analysis

Evaluating race craft performance through position changes from grid to checkered flag.

---

In [None]:
# Calculate positions gained/lost during race
df_race['Positions_Gained'] = df_race['Starting Grid'] - df_race['Position']

# Filter to classified finishes only and calculate average positions gained
finished = df_race[df_race['Finished'] == True]
overtakers = finished.groupby('Driver')['Positions_Gained'].mean().sort_values(ascending=False)

# Create color based on positive/negative
colors = ['#2ecc71' if x > 0 else '#e74c3c' for x in overtakers.values]

fig = go.Figure(data=[
    go.Bar(
        x=overtakers.values,
        y=overtakers.index,
        orientation='h',
        marker_color=colors,
        text=[f'+{x:.1f}' if x > 0 else f'{x:.1f}' for x in overtakers.values],
        textposition='outside',
        textfont_size=10
    )
])

fig.add_vline(x=0, line_dash='dash', line_color='white', line_width=2)

fig.update_layout(
    title='Average Positions Gained/Lost per Race - 2025 Season',
    title_font_size=18,
    xaxis_title='Positions Gained (Positive = Better)',
    yaxis={'categoryorder':'total ascending'},
    template='plotly_dark',
    height=700,
    margin=dict(l=150, r=80, t=80, b=50),
    font=dict(size=12)
)
fig.show()

print('\nTop 5 Overtakers (Best Race Craft):')
for driver, val in overtakers.head().items():
    print(f'  {driver}: +{val:.2f} positions per race')

In [None]:
# Scatter Plot: Starting Grid vs Finishing Position Correlation
finished = df_race[df_race['Finished'] == True]

# Create color mapping for teams
finished['Team_Color'] = finished['Team'].map(TEAM_COLORS)

fig = px.scatter(finished, x='Starting Grid', y='Position', color='Team',
                 color_discrete_map=TEAM_COLORS,
                 hover_data=['Driver', 'Track', 'Points'],
                 title='Starting Grid vs Finishing Position Correlation - 2025 Season',
                 height=550)

# Add diagonal reference line
fig.add_shape(type='line', x0=1, y0=1, x1=20, y1=20, 
              line=dict(color='white', dash='dash', width=2))

fig.add_annotation(x=18, y=18, text='No change line',
                   showarrow=False, font=dict(color='white', size=10))

fig.update_layout(
    template='plotly_dark',
    margin=dict(l=50, r=50, t=80, b=50),
    title_font_size=18,
    font=dict(size=12),
    legend=dict(font=dict(size=10))
)
fig.show()

print('\nNote: Points below the diagonal = gained positions, above = lost positions')

---

## 6. Retirement and Disqualification Analysis

Statistical breakdown of DNF (Did Not Finish) and DSQ (Disqualification) incidents.

---

In [None]:
# DNF/DSQ Count by Driver
dnf_df = df_race[df_race['Finished'] == False]

if len(dnf_df) > 0:
    dnf_count = dnf_df.groupby('Driver').size().sort_values(ascending=False)
    
    fig = go.Figure(data=[
        go.Bar(
            x=dnf_count.values,
            y=dnf_count.index,
            orientation='h',
            marker_color='#e74c3c',
            text=dnf_count.values,
            textposition='outside'
        )
    ])
    
    fig.update_layout(
        title='Retirements and Disqualifications per Driver - 2025 Season',
        title_font_size=18,
        xaxis_title='Number of DNFs/DSQs',
        yaxis={'categoryorder':'total ascending'},
        template='plotly_dark',
        height=600,
        margin=dict(l=150, r=80, t=80, b=50),
        font=dict(size=12)
    )
    fig.show()
    
    print(f'\nTotal DNF/DSQ incidents: {len(dnf_df)}')
    print(f'Drivers affected: {dnf_df["Driver"].nunique()}')
else:
    print('No DNF/DSQ incidents recorded in the dataset.')

In [None]:
# DNF/DSQ Causes Distribution Analysis
if len(dnf_df) > 0:
    dnf_reasons = dnf_df['Time/Retired'].value_counts()
    
    fig = px.pie(values=dnf_reasons.values, names=dnf_reasons.index,
                 title='Retirement and Disqualification Causes Distribution - 2025 Season',
                 color_discrete_sequence=px.colors.sequential.Reds_r,
                 height=500)
    
    fig.update_layout(
        template='plotly_dark',
        title_font_size=18,
        font=dict(size=12),
        margin=dict(l=50, r=50, t=80, b=50)
    )
    fig.update_traces(textinfo='percent+label', textfont_size=11)
    fig.show()
    
    print('\nDNF/DSQ Causes Breakdown:')
    for reason, count in dnf_reasons.items():
        print(f'  {reason}: {count}')
else:
    print('No DNF/DSQ incidents to analyze.')

---

## 7. Points Scoring Consistency

Analysis of points distribution patterns and scoring consistency across teams and drivers.

---

In [None]:
# Points Distribution by Constructor (Box Plot)
fig = px.box(df_race, x='Team', y='Points', color='Team',
             color_discrete_map=TEAM_COLORS,
             title='Points Scoring Distribution by Constructor - 2025 Season',
             height=550)

fig.update_layout(
    template='plotly_dark',
    showlegend=False,
    xaxis_tickangle=-35,
    title_font_size=18,
    margin=dict(l=50, r=50, t=80, b=120),
    font=dict(size=12),
    xaxis_title='Constructor',
    yaxis_title='Points per Race'
)
fig.show()

# Team points summary
team_summary = df_race.groupby('Team')['Points'].agg(['mean', 'std', 'max', 'sum']).round(2)
team_summary.columns = ['Avg Points', 'Std Dev', 'Best Result', 'Total Points']
team_summary = team_summary.sort_values('Total Points', ascending=False)
print('\nTeam Points Summary:')
display(team_summary)

In [None]:
# Points Consistency Analysis - Top 5 Championship Contenders (Violin Plot)
top_5_drivers = driver_stats.nlargest(5, 'Total_Points').index.tolist()
df_top5 = df_race[df_race['Driver'].isin(top_5_drivers)]

fig = px.violin(df_top5, x='Driver', y='Points', color='Driver', box=True,
                title='Points Scoring Consistency - Top 5 Drivers',
                height=500)

fig.update_layout(
    template='plotly_dark',
    showlegend=False,
    title_font_size=18,
    margin=dict(l=50, r=50, t=80, b=80),
    font=dict(size=12),
    xaxis_title='Driver',
    yaxis_title='Points per Race'
)
fig.show()

# Top 5 driver consistency
driver_consistency = df_top5.groupby('Driver')['Points'].agg(['mean', 'std']).round(2)
driver_consistency['CV'] = (driver_consistency['std'] / driver_consistency['mean'] * 100).round(1)
driver_consistency.columns = ['Avg Points', 'Std Dev', 'CV (%)']
driver_consistency = driver_consistency.sort_values('Avg Points', ascending=False)
print('\nTop 5 Drivers Consistency Analysis (Lower CV = More Consistent):')
display(driver_consistency)

---

## 8. Grand Prix Winners

Complete list of race winners throughout the 2025 season.

---

In [None]:
# Extract Grand Prix Winners (Sorted by Official Calendar Order)
winners = df_race[df_race['Position'] == 1][['Track', 'Driver', 'Team']].copy()
winners['Track_Order'] = winners['Track'].map(track_order_map)
winners = winners.sort_values('Track_Order').drop('Track_Order', axis=1).reset_index(drop=True)
winners.index = winners.index + 1  # Start index from 1

print('2025 Grand Prix Winners (Calendar Order):')
print('=' * 60)
display(winners.style.set_properties(**{'text-align': 'left'}))

In [None]:
# Race Win Distribution Analysis
win_count = winners['Driver'].value_counts()

# Create colors based on driver's team
driver_team_map = df_race.groupby('Driver')['Team'].first().to_dict()
win_colors = [TEAM_COLORS.get(driver_team_map.get(driver, ''), '#888888') for driver in win_count.index]

fig = go.Figure(data=[
    go.Pie(
        values=win_count.values,
        labels=win_count.index,
        marker_colors=win_colors,
        textinfo='label+value',
        textfont_size=12,
        hole=0.3
    )
])

fig.update_layout(
    title='Grand Prix Win Distribution - 2025 Season',
    title_font_size=18,
    template='plotly_dark',
    height=500,
    margin=dict(l=50, r=50, t=80, b=50),
    font=dict(size=12)
)
fig.show()

print('\nWin Count by Driver:')
for driver, wins in win_count.items():
    team = driver_team_map.get(driver, 'Unknown')
    print(f'  {driver} ({team}): {wins} wins')

---

## 9. Podium Statistics

Analysis of podium finishes (P1, P2, P3) distribution across all drivers.

---

In [None]:
# Calculate Podium Statistics by Driver
podiums = df_race[df_race['Position'] <= 3]
podium_count = podiums.groupby(['Driver', 'Position']).size().unstack(fill_value=0)
podium_count.columns = ['P1', 'P2', 'P3']
podium_count['Total'] = podium_count.sum(axis=1)
podium_count = podium_count.sort_values('Total', ascending=False)

# Filter to drivers with podiums
podium_count = podium_count[podium_count['Total'] > 0]

# Stacked Bar Chart: Podium Distribution
fig = go.Figure()
fig.add_trace(go.Bar(name='P1 (Win)', x=podium_count.index, y=podium_count['P1'], 
                     marker_color='gold', text=podium_count['P1'], textposition='inside'))
fig.add_trace(go.Bar(name='P2', x=podium_count.index, y=podium_count['P2'], 
                     marker_color='silver', text=podium_count['P2'], textposition='inside'))
fig.add_trace(go.Bar(name='P3', x=podium_count.index, y=podium_count['P3'], 
                     marker_color='#cd7f32', text=podium_count['P3'], textposition='inside'))

fig.update_layout(
    barmode='stack',
    title='Podium Finishes by Driver - 2025 Season',
    title_font_size=18,
    template='plotly_dark',
    xaxis_tickangle=-35,
    height=550,
    margin=dict(l=50, r=50, t=80, b=120),
    font=dict(size=12),
    xaxis_title='Driver',
    yaxis_title='Number of Podiums',
    legend=dict(font=dict(size=11))
)
fig.show()

print('\nPodium Statistics:')
display(podium_count.style.background_gradient(cmap='YlOrRd', subset=['Total']))

---

## 10. Track Performance Analysis

Team and driver performance breakdown by circuit type.

---

In [None]:
# Average points per track for top teams
top_teams = combined_team_standings.head(5).index.tolist()
df_top_teams = df_race[df_race['Team'].isin(top_teams)]

track_performance = df_top_teams.pivot_table(
    index='Track', 
    columns='Team', 
    values='Points', 
    aggfunc='sum'
).fillna(0)

# Reorder by calendar
track_performance = track_performance.reindex([t for t in available_tracks if t in track_performance.index])

# Create heatmap with matplotlib for better readability
fig, ax = plt.subplots(figsize=(12, 14))

sns.heatmap(track_performance,
            cmap='YlOrRd',
            annot=True,
            fmt='.0f',
            linewidths=1.5,
            linecolor='#333333',
            cbar_kws={'label': 'Combined Team Points', 'shrink': 0.8},
            annot_kws={'size': 10, 'weight': 'bold'},
            ax=ax)

ax.set_title('Team Points by Grand Prix - 2025 Season (Top 5 Teams)\n', fontsize=16, fontweight='bold')
ax.set_xlabel('\nConstructor', fontsize=12)
ax.set_ylabel('Grand Prix\n', fontsize=12)

plt.xticks(rotation=30, ha='right', fontsize=11)
plt.yticks(fontsize=10)

plt.tight_layout()
plt.show()

---

## Summary

This notebook provides comprehensive analysis of the 2025 FIA Formula 1 World Championship including:

1. **Championship Standings** - Official combined Race + Sprint points standings
2. **Championship Progression** - Cumulative points development following F1 calendar order
3. **Teammate Comparison** - Head-to-head intra-team performance analysis
4. **Results Heatmap** - Visual matrix of finishing positions across all events
5. **Grid Analysis** - Starting position to finish correlation and overtaking metrics
6. **DNF Statistics** - Retirement and disqualification breakdown
7. **Points Consistency** - Scoring pattern analysis by team and driver
8. **Race Winners** - Complete Grand Prix victories list
9. **Podium Statistics** - P1, P2, P3 finish distribution
10. **Track Performance** - Team performance analysis by circuit

---

In [None]:
# Final Championship Summary
print('=' * 70)
print('2025 FIA Formula 1 World Championship Summary (Post-Qatar GP)')
print('=' * 70)
print(f"\nDrivers' Championship Leader: {combined_driver_standings.index[0]}")
print(f"  Total Points: {combined_driver_standings['Total_Points'].iloc[0]}")
print(f"  Race Wins: {combined_driver_standings['Race_Wins'].iloc[0]}")
print(f"\nConstructors' Championship Leader: {combined_team_standings.index[0]}")
print(f"  Total Points: {combined_team_standings['Total_Points'].iloc[0]}")
print(f"\nChampionship Gap to P2:")
driver_gap = combined_driver_standings['Total_Points'].iloc[0] - combined_driver_standings['Total_Points'].iloc[1]
team_gap = combined_team_standings['Total_Points'].iloc[0] - combined_team_standings['Total_Points'].iloc[1]
print(f"  Drivers: {driver_gap} points")
print(f"  Constructors: {team_gap} points")
print('=' * 70)

In [2]:
# Load Race and Sprint Datasets for Combined Championship Standings
df_race = load_data('../data/Formula1_2025Season_RaceResults.csv')
df_sprint = load_data('../data/Formula1_2025Season_SprintResults.csv')

df_race = clean_data(df_race)

# Calculate driver and team statistics
driver_stats = calculate_driver_stats(df_race)
team_stats = calculate_team_stats(df_race)

# Calculate official combined standings (Race + Sprint points as per FIA regulations)
combined_driver_standings = calculate_combined_standings(df_race, df_sprint)
combined_team_standings = calculate_combined_constructor_standings(df_race, df_sprint)

print(f'Dataset loaded: {len(df_race)} race records from {df_race["Track"].nunique()} Grand Prix')
print(f'Dataset loaded: {len(df_sprint)} sprint records from {df_sprint["Track"].nunique()} Sprint events')
print(f'\nChampionship Leader: {combined_driver_standings.index[0]} with {combined_driver_standings["Total_Points"].iloc[0]} points')

Dataset loaded: 459 race records from 23 Grand Prix
Dataset loaded: 120 sprint records from 6 Sprint events

Championship Leader: Lando Norris with 408.0 points


## 1. Championship Standings
Official FIA World Championship standings combining Race and Sprint points.

In [3]:
# Drivers' World Championship Standings (Combined Race + Sprint Points)
top_drivers = combined_driver_standings.head(10).reset_index()
fig = px.bar(top_drivers, x='Total_Points', y='Driver', orientation='h',
             color='Total_Points', color_continuous_scale='Reds',
             title="2025 FIA Formula 1 Drivers' World Championship",
             text='Total_Points',
             hover_data=['Race_Points', 'Sprint_Points', 'Race_Wins'])
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()

print("\n2025 Drivers' Championship Standings (Post-Qatar GP):")
combined_driver_standings.head(10)


2025 Drivers' Championship Standings (Post-Qatar GP):


Unnamed: 0_level_0,Position,Team,Race_Points,Sprint_Points,Total_Points,Race_Wins,Sprint_Wins,Total_Wins
Driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Lando Norris,1,McLaren Mercedes,379,29.0,408.0,7.0,0.0,7.0
Max Verstappen,2,Red Bull Racing Honda RBPT,364,32.0,396.0,7.0,0.0,7.0
Oscar Piastri,3,McLaren Mercedes,363,29.0,392.0,7.0,0.0,7.0
George Russell,4,Mercedes,279,30.0,309.0,2.0,0.0,2.0
Charles Leclerc,5,Ferrari,213,17.0,230.0,0.0,0.0,0.0
Lewis Hamilton,6,Ferrari,131,21.0,152.0,0.0,0.0,0.0
Kimi Antonelli,7,Mercedes,135,15.0,150.0,0.0,0.0,0.0
Alexander Albon,8,Williams Mercedes,70,3.0,73.0,0.0,0.0,0.0
Carlos Sainz,9,Williams Mercedes,54,10.0,64.0,0.0,0.0,0.0
Isack Hadjar,10,Racing Bulls Honda RBPT,50,1.0,51.0,0.0,0.0,0.0


In [4]:
# Constructors' World Championship Standings (Combined Race + Sprint Points)
teams_sorted = combined_team_standings.reset_index()
fig = px.bar(teams_sorted, x='Team', y='Total_Points',
             color='Total_Points', color_continuous_scale='Blues',
             title="2025 FIA Formula 1 Constructors' World Championship",
             text='Total_Points',
             hover_data=['Race_Points', 'Sprint_Points'])
fig.update_layout(template='plotly_dark')
fig.show()

print("\n2025 Constructors' Championship Standings (Post-Qatar GP):")
combined_team_standings


2025 Constructors' Championship Standings (Post-Qatar GP):


Unnamed: 0_level_0,Position,Race_Points,Sprint_Points,Total_Points,Race_Wins,Sprint_Wins
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
McLaren Mercedes,1,732,58.0,790.0,14.0,0.0
Mercedes,2,414,45.0,459.0,2.0,0.0
Red Bull Racing Honda RBPT,3,385,41.0,426.0,7.0,0.0
Ferrari,4,344,38.0,382.0,0.0,0.0
Williams Mercedes,5,124,13.0,137.0,0.0,0.0
Racing Bulls Honda RBPT,6,88,4.0,92.0,0.0,0.0
Aston Martin Aramco Mercedes,7,71,9.0,80.0,0.0,0.0
Haas Ferrari,8,67,6.0,73.0,0.0,0.0
Kick Sauber Ferrari,9,68,0.0,68.0,0.0,0.0
Alpine Renault,10,20,2.0,22.0,0.0,0.0


## 2. Championship Progression
Cumulative points development throughout the 2025 season following the official F1 calendar order.

In [5]:
# Define official F1 2025 Calendar order
F1_2025_CALENDAR = [
    'Australia', 'China', 'Japan', 'Bahrain', 'Saudi Arabia', 'Miami',
    'Emilia-Romagna', 'Monaco', 'Spain', 'Canada', 'Austria', 'Great Britain',
    'Belgium', 'Hungary', 'Netherlands', 'Italy', 'Azerbaijan', 'Singapore',
    'United States', 'Mexico', 'Brazil', 'Las Vegas', 'Qatar', 'Abu Dhabi'
]

# Sort race data according to official calendar order
available_tracks = [t for t in F1_2025_CALENDAR if t in df_race['Track'].unique()]
track_order_map = {track: i for i, track in enumerate(available_tracks)}
df_race['Track_Order'] = df_race['Track'].map(track_order_map)
df_sorted = df_race.sort_values('Track_Order')

# Aggregate race points per driver per track
race_points = df_sorted.groupby(['Driver', 'Track']).agg({'Points': 'sum'}).reset_index()
race_points = race_points.pivot(index='Track', columns='Driver', values='Points').fillna(0)

# Reorder tracks according to calendar
race_points = race_points.reindex([t for t in available_tracks if t in race_points.index])

# Add sprint points for sprint weekend tracks
sprint_tracks = df_sprint['Track'].unique()
for track in race_points.index:
    if track in sprint_tracks:
        sprint_track = df_sprint[df_sprint['Track'] == track]
        for driver in race_points.columns:
            sprint_pts = sprint_track[sprint_track['Driver'] == driver]['Points'].sum()
            race_points.loc[track, driver] += sprint_pts

# Calculate cumulative championship points
cumulative_points = race_points.cumsum()

# Filter to top 10 championship contenders
top_10_names = combined_driver_standings.head(10).index.tolist()
cumulative_top10 = cumulative_points[[d for d in top_10_names if d in cumulative_points.columns]]

fig = px.line(cumulative_top10, 
              title='2025 Championship Progression - Top 10 Drivers (Race + Sprint Points)',
              labels={'value': 'Cumulative Points', 'variable': 'Driver'})
fig.update_layout(template='plotly_dark', legend_title='Driver')
fig.show()

## 3. Head-to-Head Teammate Comparison
Intra-team performance analysis comparing teammates' race finishing positions.

In [6]:
# Calculate Head-to-Head Teammate Battle Statistics
team_h2h = {}
for team in df_race['Team'].unique():
    drivers = df_race[df_race['Team'] == team]['Driver'].unique()
    if len(drivers) >= 2:
        d1, d2 = drivers[0], drivers[1]
        team_df = df_race[df_race['Team'] == team]
        
        h2h_data = team_df.pivot(index='Track', columns='Driver', values='Position')
        if d1 in h2h_data.columns and d2 in h2h_data.columns:
            d1_wins = (h2h_data[d1] < h2h_data[d2]).sum()
            d2_wins = (h2h_data[d2] < h2h_data[d1]).sum()
            team_h2h[team] = {'Driver1': d1, 'Driver2': d2, 'D1_Wins': d1_wins, 'D2_Wins': d2_wins}

h2h_df = pd.DataFrame(team_h2h).T
print('Head-to-Head Teammate Battles (Race Finishes):')
h2h_df

Head-to-Head Teammate Battles (Race Finishes):


Unnamed: 0,Driver1,Driver2,D1_Wins,D2_Wins
McLaren Mercedes,Lando Norris,Oscar Piastri,11,9
Red Bull Racing Honda RBPT,Max Verstappen,Liam Lawson,1,0
Mercedes,George Russell,Kimi Antonelli,16,3
Williams Mercedes,Alexander Albon,Carlos Sainz,11,5
Aston Martin Aramco Mercedes,Lance Stroll,Fernando Alonso,3,13
Kick Sauber Ferrari,Nico Hulkenberg,Gabriel Bortoleto,7,7
Ferrari,Charles Leclerc,Lewis Hamilton,17,3
Alpine Renault,Pierre Gasly,Jack Doohan,2,0
Racing Bulls Honda RBPT,Yuki Tsunoda,Isack Hadjar,0,1
Haas Ferrari,Esteban Ocon,Oliver Bearman,8,11


In [7]:
# Visualize Head-to-Head Teammate Battles
fig = go.Figure()
for idx, row in h2h_df.iterrows():
    fig.add_trace(go.Bar(name=row['Driver1'], x=[idx], y=[row['D1_Wins']], 
                         marker_color='crimson', text=[row['D1_Wins']], textposition='inside'))
    fig.add_trace(go.Bar(name=row['Driver2'], x=[idx], y=[row['D2_Wins']], 
                         marker_color='navy', text=[row['D2_Wins']], textposition='inside'))

fig.update_layout(barmode='group', title='Teammate Head-to-Head Battle Results',
                  xaxis_title='Constructor', yaxis_title='Race Wins vs Teammate',
                  template='plotly_dark', showlegend=False)
fig.show()

## 4. Race Results Heatmap
Visual matrix of finishing positions across all drivers and Grand Prix events.

In [8]:
# Create Race Finishing Position Heatmap (Driver x Track)
position_pivot = df_race.pivot(index='Driver', columns='Track', values='Position')

# Reorder columns according to F1 calendar
position_pivot = position_pivot[[t for t in available_tracks if t in position_pivot.columns]]

fig = px.imshow(position_pivot, 
                color_continuous_scale='RdYlGn_r',
                title='Race Finishing Positions Matrix - 2025 Season',
                labels={'color': 'Position'})
fig.update_layout(template='plotly_dark', height=700)
fig.show()

## 5. Grid to Finish Position Analysis
Evaluating race craft performance through position changes from grid to checkered flag.

In [9]:
# Calculate positions gained/lost during race
df_race['Positions_Gained'] = df_race['Starting Grid'] - df_race['Position']

# Filter to classified finishes only and calculate average positions gained
finished = df_race[df_race['Finished'] == True]
overtakers = finished.groupby('Driver')['Positions_Gained'].mean().sort_values(ascending=False)

fig = px.bar(overtakers.reset_index(), x='Positions_Gained', y='Driver', orientation='h',
             color='Positions_Gained', color_continuous_scale='RdYlGn',
             title='Average Positions Gained/Lost per Race - 2025 Season')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.add_vline(x=0, line_dash='dash', line_color='white')
fig.show()

In [10]:
# Scatter Plot: Starting Grid vs Finishing Position Correlation
finished = df_race[df_race['Finished'] == True]
fig = px.scatter(finished, x='Starting Grid', y='Position', color='Team',
                 hover_data=['Driver', 'Track', 'Points'],
                 title='Starting Grid vs Finishing Position Correlation - 2025 Season')
fig.add_shape(type='line', x0=1, y0=1, x1=20, y1=20, 
              line=dict(color='white', dash='dash'))
fig.update_layout(template='plotly_dark')
fig.show()





## 6. Retirement and Disqualification Analysis
Statistical breakdown of DNF (Did Not Finish) and DSQ (Disqualification) incidents.

In [11]:
# DNF/DSQ Count by Driver
dnf_df = df_race[df_race['Finished'] == False]
dnf_count = dnf_df.groupby('Driver').size().sort_values(ascending=False)

fig = px.bar(dnf_count.reset_index(name='DNFs'), x='DNFs', y='Driver', orientation='h',
             color='DNFs', color_continuous_scale='Reds',
             title='Retirements and Disqualifications per Driver - 2025 Season')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()

In [12]:
# DNF/DSQ Causes Distribution Analysis
dnf_reasons = dnf_df['Time/Retired'].value_counts()
fig = px.pie(values=dnf_reasons.values, names=dnf_reasons.index,
             title='Retirement and Disqualification Causes Distribution - 2025 Season')
fig.update_layout(template='plotly_dark')
fig.show()

## 7. Points Scoring Consistency
Analysis of points distribution patterns and scoring consistency across teams and drivers.

In [13]:
# Points Distribution by Constructor (Box Plot)
fig = px.box(df_race, x='Team', y='Points', color='Team',
             title='Points Scoring Distribution by Constructor - 2025 Season')
fig.update_layout(template='plotly_dark', showlegend=False,
                  xaxis_tickangle=-45)
fig.show()





In [14]:
# Points Consistency Analysis - Top 5 Championship Contenders (Violin Plot)
top_5_drivers = driver_stats.nlargest(5, 'Total_Points').index.tolist()
df_top5 = df_race[df_race['Driver'].isin(top_5_drivers)]

fig = px.violin(df_top5, x='Driver', y='Points', color='Driver', box=True,
                title='Points Scoring Consistency - Top 5 Drivers')
fig.update_layout(template='plotly_dark', showlegend=False)
fig.show()





## 8. Grand Prix Winners
Complete list of race winners throughout the 2025 season.

In [15]:
# Extract Grand Prix Winners (Sorted by Official Calendar Order)
winners = df_race[df_race['Position'] == 1][['Track', 'Driver', 'Team']].copy()
winners['Track_Order'] = winners['Track'].map(track_order_map)
winners = winners.sort_values('Track_Order').drop('Track_Order', axis=1)
print('2025 Grand Prix Winners (Calendar Order):')
winners

2025 Grand Prix Winners (Calendar Order):


Unnamed: 0,Track,Driver,Team
0,Australia,Lando Norris,McLaren Mercedes
20,China,Oscar Piastri,McLaren Mercedes
40,Japan,Max Verstappen,Red Bull Racing Honda RBPT
60,Bahrain,Oscar Piastri,McLaren Mercedes
80,Saudi Arabia,Oscar Piastri,McLaren Mercedes
100,Miami,Oscar Piastri,McLaren Mercedes
120,Emilia-Romagna,Max Verstappen,Red Bull Racing Honda RBPT
140,Monaco,Lando Norris,McLaren Mercedes
160,Spain,Oscar Piastri,McLaren Mercedes
179,Canada,George Russell,Mercedes


In [16]:
# Race Win Distribution Analysis
win_count = winners['Driver'].value_counts()
fig = px.pie(values=win_count.values, names=win_count.index,
             title='Grand Prix Win Distribution - 2025 Season',
             color_discrete_sequence=px.colors.sequential.Reds_r)
fig.update_layout(template='plotly_dark')
fig.show()

## 9. Podium Statistics
Analysis of podium finishes (P1, P2, P3) distribution across all drivers.

In [17]:
# Calculate Podium Statistics by Driver
podiums = df_race[df_race['Position'] <= 3]
podium_count = podiums.groupby(['Driver', 'Position']).size().unstack(fill_value=0)
podium_count.columns = ['P1', 'P2', 'P3']
podium_count['Total'] = podium_count.sum(axis=1)
podium_count = podium_count.sort_values('Total', ascending=False)

# Stacked Bar Chart: Podium Distribution
fig = go.Figure()
fig.add_trace(go.Bar(name='P1 (Win)', x=podium_count.index, y=podium_count['P1'], marker_color='gold'))
fig.add_trace(go.Bar(name='P2', x=podium_count.index, y=podium_count['P2'], marker_color='silver'))
fig.add_trace(go.Bar(name='P3', x=podium_count.index, y=podium_count['P3'], marker_color='#cd7f32'))

fig.update_layout(barmode='stack', title='Podium Finishes by Driver - 2025 Season',
                  template='plotly_dark', xaxis_tickangle=-45)
fig.show()

---
## Summary

This notebook provides comprehensive analysis of the 2025 FIA Formula 1 World Championship including:

1. **Championship Standings** - Official combined Race + Sprint points standings
2. **Championship Progression** - Cumulative points development following F1 calendar order
3. **Teammate Comparison** - Head-to-head intra-team performance analysis
4. **Results Heatmap** - Visual matrix of finishing positions across all events
5. **Grid Analysis** - Starting position to finish correlation and overtaking metrics
6. **DNF Statistics** - Retirement and disqualification breakdown
7. **Points Consistency** - Scoring pattern analysis by team and driver
8. **Race Winners** - Complete Grand Prix victories list
9. **Podium Statistics** - P1, P2, P3 finish distribution

**2025 Championship Standings (Post-Qatar GP):**
- **Drivers' Champion Leader:** Lando Norris - 408 points (Race: 379 + Sprint: 29)
- **Constructors' Champion Leader:** McLaren Mercedes - 790 points