In [13]:
# Import libraries and setup path
import sys
import os
from pathlib import Path

# Setup project root for imports
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

plt.style.use('seaborn-v0_8-darkgrid')
print('Libraries loaded!')

Libraries loaded!


In [None]:
# =============================================================================
# ENVIRONMENT SETUP
# =============================================================================
import sys
import os
from pathlib import Path

# Setup project root for imports
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import team colors
try:
    from src.config import TEAM_COLORS, get_team_color
except ImportError:
    TEAM_COLORS = {}
    def get_team_color(team): return '#808080'

plt.style.use('seaborn-v0_8-darkgrid')
print('Libraries loaded successfully!')

## 1. Data Loading and Preprocessing

In [None]:
# =============================================================================
# LOAD QUALIFYING DATA
# =============================================================================
data_path = project_root / 'data' / 'Formula1_2025Season_QualifyingResults.csv'

try:
    df_quali = pd.read_csv(data_path)
    print(f'Loaded {len(df_quali)} qualifying results from {df_quali["Track"].nunique()} races')
    print(f'Drivers: {df_quali["Driver"].nunique()}')
    print(f'\nTracks: {list(df_quali["Track"].unique())}')
    df_quali.head()
except FileNotFoundError:
    print(f'ERROR: File not found at {data_path}')
    print('Please ensure the qualifying data file exists.')
    df_quali = pd.DataFrame()

In [None]:
# =============================================================================
# TIME CONVERSION FUNCTION
# =============================================================================
def time_to_seconds(time_str):
    """
    Convert qualifying time string to seconds.
    
    Args:
        time_str: Time in format 'M:SS.mmm' or 'SS.mmm'
        
    Returns:
        float: Time in seconds, or NaN if invalid
    """
    if pd.isna(time_str) or time_str == '' or time_str == 'No Time':
        return np.nan
    try:
        time_str = str(time_str).strip()
        if ':' in time_str:
            parts = time_str.split(':')
            return float(parts[0]) * 60 + float(parts[1])
        return float(time_str)
    except (ValueError, IndexError):
        return np.nan

# Apply time conversion
if not df_quali.empty:
    df_quali['Q1_sec'] = df_quali['Q1'].apply(time_to_seconds)
    df_quali['Q2_sec'] = df_quali['Q2'].apply(time_to_seconds)
    df_quali['Q3_sec'] = df_quali['Q3'].apply(time_to_seconds)
    
    # Ensure Position is numeric
    df_quali['Position'] = pd.to_numeric(df_quali['Position'], errors='coerce')
    
    print('Time conversion complete!')
    print(f'Q1 valid times: {df_quali["Q1_sec"].notna().sum()}')
    print(f'Q2 valid times: {df_quali["Q2_sec"].notna().sum()}')
    print(f'Q3 valid times: {df_quali["Q3_sec"].notna().sum()}')

## 2. Pole Position Statistics

In [None]:
# =============================================================================
# POLE POSITIONS
# =============================================================================
if not df_quali.empty:
    poles = df_quali[df_quali['Position'] == 1][['Track', 'Driver', 'Team', 'Q3']].copy()
    
    print('=' * 60)
    print('           POLE POSITIONS - 2025 SEASON')
    print('=' * 60)
    display(poles)
else:
    print('No data available')

In [None]:
# =============================================================================
# POLE POSITION DISTRIBUTION
# =============================================================================
if not df_quali.empty and len(poles) > 0:
    pole_count = poles['Driver'].value_counts()
    
    # Get team colors for each driver
    driver_teams = poles.groupby('Driver')['Team'].first()
    colors = [get_team_color(driver_teams.get(d, '')) for d in pole_count.index]
    
    fig = go.Figure(data=[go.Pie(
        labels=pole_count.index,
        values=pole_count.values,
        marker=dict(colors=colors),
        textinfo='label+value',
        textfont=dict(size=14),
        hole=0.4
    )])
    
    fig.update_layout(
        title=dict(text='Pole Position Distribution - 2025 Season', font=dict(size=18)),
        template='plotly_dark',
        height=500,
        showlegend=True,
        legend=dict(font=dict(size=12))
    )
    fig.show()
else:
    print('No pole position data available')

## 3. Q3 Performance Analysis

In [None]:
# =============================================================================
# Q3 APPEARANCES
# =============================================================================
if not df_quali.empty:
    q3_appearances = df_quali[df_quali['Q3_sec'].notna()].groupby('Driver').size().sort_values(ascending=True)
    
    if len(q3_appearances) > 0:
        # Get team colors
        driver_teams = df_quali.groupby('Driver')['Team'].first()
        colors = [get_team_color(driver_teams.get(d, '')) for d in q3_appearances.index]
        
        fig = go.Figure(go.Bar(
            x=q3_appearances.values,
            y=q3_appearances.index,
            orientation='h',
            marker_color=colors,
            text=q3_appearances.values,
            textposition='outside',
            textfont=dict(size=11)
        ))
        
        fig.update_layout(
            title=dict(text='Q3 Appearances per Driver', font=dict(size=18)),
            xaxis_title='Number of Q3 Appearances',
            yaxis_title='',
            template='plotly_dark',
            height=600,
            margin=dict(l=150, r=50, t=80, b=50)
        )
        fig.show()
    else:
        print('No Q3 data available')
else:
    print('No data available')

In [None]:
# =============================================================================
# AVERAGE QUALIFYING POSITION
# =============================================================================
if not df_quali.empty:
    avg_quali_pos = df_quali.groupby('Driver')['Position'].mean().sort_values()
    
    # Get team colors
    driver_teams = df_quali.groupby('Driver')['Team'].first()
    colors = [get_team_color(driver_teams.get(d, '')) for d in avg_quali_pos.index]
    
    fig = go.Figure(go.Bar(
        x=avg_quali_pos.values,
        y=avg_quali_pos.index,
        orientation='h',
        marker_color=colors,
        text=[f'{v:.1f}' for v in avg_quali_pos.values],
        textposition='outside',
        textfont=dict(size=11)
    ))
    
    fig.update_layout(
        title=dict(text='Average Qualifying Position per Driver', font=dict(size=18)),
        xaxis_title='Average Position (lower is better)',
        yaxis_title='',
        template='plotly_dark',
        height=600,
        margin=dict(l=150, r=50, t=80, b=50)
    )
    fig.show()

## 4. Qualifying Position Heatmap

Visual matrix showing qualifying positions for each driver at each race.

In [None]:
# =============================================================================
# QUALIFYING HEATMAP WITH IMPROVED READABILITY
# =============================================================================
if not df_quali.empty:
    # Create pivot table
    quali_pivot = df_quali.pivot(index='Driver', columns='Track', values='Position')
    
    # Sort drivers by average position
    avg_pos = quali_pivot.mean(axis=1).sort_values()
    quali_pivot = quali_pivot.loc[avg_pos.index]
    
    # Create heatmap with better spacing
    fig = go.Figure(data=go.Heatmap(
        z=quali_pivot.values,
        x=quali_pivot.columns,
        y=quali_pivot.index,
        colorscale='RdYlGn_r',
        zmin=1,
        zmax=20,
        text=quali_pivot.values.astype(str),
        texttemplate='%{text}',
        textfont=dict(size=10, color='white'),
        hovertemplate='Driver: %{y}<br>Track: %{x}<br>Position: %{z}<extra></extra>',
        colorbar=dict(
            title='Position',
            titleside='right',
            tickmode='array',
            tickvals=[1, 5, 10, 15, 20],
            ticktext=['P1', 'P5', 'P10', 'P15', 'P20']
        ),
        xgap=3,  # Add horizontal gap between cells
        ygap=3   # Add vertical gap between cells
    ))
    
    fig.update_layout(
        title=dict(
            text='Qualifying Positions - Driver vs Track Matrix',
            font=dict(size=20),
            x=0.5
        ),
        xaxis=dict(
            title='Grand Prix',
            tickangle=-45,
            tickfont=dict(size=10),
            side='bottom'
        ),
        yaxis=dict(
            title='Driver',
            tickfont=dict(size=10),
            autorange='reversed'
        ),
        template='plotly_dark',
        height=800,
        width=1200,
        margin=dict(l=150, r=100, t=100, b=150)
    )
    
    fig.show()
else:
    print('No data available for heatmap')

## 5. Q1 to Q3 Time Improvement

In [None]:
# =============================================================================
# TIME IMPROVEMENT Q1 TO Q3
# =============================================================================
if not df_quali.empty:
    # Filter to drivers who made Q3
    q_times = df_quali[df_quali['Q3_sec'].notna()].copy()
    
    if len(q_times) > 0:
        # Calculate improvement from Q1 to Q3
        q_times['Q1_to_Q3_improvement'] = q_times['Q1_sec'] - q_times['Q3_sec']
        
        # Average improvement per driver
        avg_improvement = q_times.groupby('Driver')['Q1_to_Q3_improvement'].mean().sort_values(ascending=False)
        
        # Get team colors
        driver_teams = df_quali.groupby('Driver')['Team'].first()
        colors = [get_team_color(driver_teams.get(d, '')) for d in avg_improvement.index]
        
        fig = go.Figure(go.Bar(
            x=avg_improvement.values,
            y=avg_improvement.index,
            orientation='h',
            marker_color=colors,
            text=[f'{v:.3f}s' for v in avg_improvement.values],
            textposition='outside',
            textfont=dict(size=10)
        ))
        
        fig.update_layout(
            title=dict(text='Average Time Improvement Q1 to Q3 (seconds)', font=dict(size=18)),
            xaxis_title='Time Improvement (seconds)',
            yaxis_title='',
            template='plotly_dark',
            height=600,
            margin=dict(l=150, r=80, t=80, b=50)
        )
        fig.show()
    else:
        print('No Q3 times available for analysis')
else:
    print('No data available')

## 6. Team Qualifying Performance

In [None]:
# =============================================================================
# TEAM AVERAGE QUALIFYING POSITION
# =============================================================================
if not df_quali.empty:
    team_quali = df_quali.groupby('Team')['Position'].mean().sort_values()
    
    # Get team colors
    colors = [get_team_color(t) for t in team_quali.index]
    
    fig = go.Figure(go.Bar(
        x=team_quali.index,
        y=team_quali.values,
        marker_color=colors,
        text=[f'{v:.1f}' for v in team_quali.values],
        textposition='outside',
        textfont=dict(size=12)
    ))
    
    fig.update_layout(
        title=dict(text='Average Qualifying Position per Team', font=dict(size=18)),
        xaxis_title='Team',
        yaxis_title='Average Position',
        template='plotly_dark',
        height=500,
        xaxis_tickangle=-45,
        yaxis=dict(autorange='reversed'),
        margin=dict(l=50, r=50, t=80, b=120)
    )
    fig.show()

In [None]:
# =============================================================================
# TEAM Q3 RATE
# =============================================================================
if not df_quali.empty:
    total_entries = df_quali.groupby('Team').size()
    q3_entries = df_quali[df_quali['Q3_sec'].notna()].groupby('Team').size()
    q3_rate = (q3_entries / total_entries * 100).fillna(0).sort_values(ascending=False)
    
    # Get team colors
    colors = [get_team_color(t) for t in q3_rate.index]
    
    fig = go.Figure(go.Bar(
        x=q3_rate.index,
        y=q3_rate.values,
        marker_color=colors,
        text=[f'{v:.0f}%' for v in q3_rate.values],
        textposition='outside',
        textfont=dict(size=12)
    ))
    
    fig.update_layout(
        title=dict(text='Q3 Qualification Rate per Team (%)', font=dict(size=18)),
        xaxis_title='Team',
        yaxis_title='Q3 Rate (%)',
        template='plotly_dark',
        height=500,
        xaxis_tickangle=-45,
        yaxis=dict(range=[0, 105]),
        margin=dict(l=50, r=50, t=80, b=120)
    )
    fig.show()

## 7. Gap to Pole Analysis

In [None]:
# =============================================================================
# CALCULATE GAP TO POLE
# =============================================================================
if not df_quali.empty:
    def calculate_gap_to_pole(group):
        """
        Calculate time gap to pole position for each driver at a track.
        """
        pole_time = group[group['Position'] == 1]['Q3_sec'].values
        if len(pole_time) > 0 and not np.isnan(pole_time[0]):
            group['Gap_to_Pole'] = group['Q3_sec'] - pole_time[0]
        else:
            group['Gap_to_Pole'] = np.nan
        return group
    
    df_quali = df_quali.groupby('Track', group_keys=False).apply(calculate_gap_to_pole)
    
    # Average gap to pole per driver (Q3 drivers only)
    q3_drivers = df_quali[df_quali['Q3_sec'].notna()]
    
    if len(q3_drivers) > 0:
        avg_gap = q3_drivers.groupby('Driver')['Gap_to_Pole'].mean().sort_values()
        
        # Get team colors
        driver_teams = df_quali.groupby('Driver')['Team'].first()
        colors = [get_team_color(driver_teams.get(d, '')) for d in avg_gap.index]
        
        fig = go.Figure(go.Bar(
            x=avg_gap.values,
            y=avg_gap.index,
            orientation='h',
            marker_color=colors,
            text=[f'{v:.3f}s' for v in avg_gap.values],
            textposition='outside',
            textfont=dict(size=10)
        ))
        
        fig.update_layout(
            title=dict(
                text='Average Gap to Pole Position (Q3 drivers only)',
                font=dict(size=18)
            ),
            xaxis_title='Gap to Pole (seconds)',
            yaxis_title='',
            template='plotly_dark',
            height=600,
            margin=dict(l=150, r=80, t=80, b=50)
        )
        fig.show()
        
        print('\nAverage Gap to Pole (Top 10):')
        print(avg_gap.head(10).to_string())
    else:
        print('No Q3 data available for gap analysis')
else:
    print('No data available')

In [None]:
# =============================================================================
# GAP TO POLE - TRACK BY TRACK COMPARISON
# =============================================================================
if not df_quali.empty and 'Gap_to_Pole' in df_quali.columns:
    # Get top 5 drivers by average gap
    top_drivers = avg_gap.head(5).index.tolist()
    df_top = df_quali[df_quali['Driver'].isin(top_drivers) & df_quali['Gap_to_Pole'].notna()]
    
    if len(df_top) > 0:
        fig = go.Figure()
        
        for driver in top_drivers:
            driver_data = df_top[df_top['Driver'] == driver].sort_values('Track')
            team = driver_data['Team'].iloc[0] if len(driver_data) > 0 else ''
            color = get_team_color(team)
            
            fig.add_trace(go.Scatter(
                x=driver_data['Track'],
                y=driver_data['Gap_to_Pole'],
                mode='lines+markers',
                name=driver,
                line=dict(color=color, width=2),
                marker=dict(size=8)
            ))
        
        fig.update_layout(
            title=dict(text='Gap to Pole by Track - Top 5 Qualifiers', font=dict(size=18)),
            xaxis_title='Grand Prix',
            yaxis_title='Gap to Pole (seconds)',
            template='plotly_dark',
            height=500,
            xaxis_tickangle=-45,
            legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5),
            margin=dict(l=50, r=50, t=100, b=120)
        )
        fig.show()
    else:
        print('No data available for track comparison')

## 8. Qualifying Consistency

In [None]:
# =============================================================================
# QUALIFYING CONSISTENCY (STANDARD DEVIATION)
# =============================================================================
if not df_quali.empty:
    # Calculate position standard deviation (lower = more consistent)
    quali_consistency = df_quali.groupby('Driver')['Position'].std().sort_values()
    
    # Get team colors
    driver_teams = df_quali.groupby('Driver')['Team'].first()
    colors = [get_team_color(driver_teams.get(d, '')) for d in quali_consistency.index]
    
    fig = go.Figure(go.Bar(
        x=quali_consistency.values,
        y=quali_consistency.index,
        orientation='h',
        marker_color=colors,
        text=[f'{v:.2f}' for v in quali_consistency.values],
        textposition='outside',
        textfont=dict(size=10)
    ))
    
    fig.update_layout(
        title=dict(
            text='Qualifying Consistency (Position Std Dev - Lower = More Consistent)',
            font=dict(size=16)
        ),
        xaxis_title='Standard Deviation',
        yaxis_title='',
        template='plotly_dark',
        height=600,
        margin=dict(l=150, r=80, t=80, b=50)
    )
    fig.show()

In [None]:
# =============================================================================
# QUALIFYING POSITION DISTRIBUTION - BOX PLOT
# =============================================================================
if not df_quali.empty:
    # Sort drivers by median position
    driver_order = df_quali.groupby('Driver')['Position'].median().sort_values().index.tolist()
    
    fig = go.Figure()
    
    for driver in driver_order:
        driver_data = df_quali[df_quali['Driver'] == driver]
        team = driver_data['Team'].iloc[0] if len(driver_data) > 0 else ''
        color = get_team_color(team)
        
        fig.add_trace(go.Box(
            y=driver_data['Position'],
            name=driver,
            marker_color=color,
            boxmean=True
        ))
    
    fig.update_layout(
        title=dict(text='Qualifying Position Distribution by Driver', font=dict(size=18)),
        xaxis_title='Driver',
        yaxis_title='Qualifying Position',
        yaxis=dict(autorange='reversed'),
        template='plotly_dark',
        height=600,
        xaxis_tickangle=-45,
        showlegend=False,
        margin=dict(l=50, r=50, t=80, b=150)
    )
    fig.show()

In [None]:
# =============================================================================
# FRONT ROW LOCKOUTS
# =============================================================================
if not df_quali.empty:
    front_row = df_quali[df_quali['Position'] <= 2][['Track', 'Driver', 'Team', 'Position']]
    front_row_teams = front_row.groupby(['Track', 'Team']).size().reset_index(name='Drivers')
    lockouts = front_row_teams[front_row_teams['Drivers'] == 2]
    
    if len(lockouts) > 0:
        print('=' * 60)
        print('    FRONT ROW LOCKOUTS (1-2 by same team)')
        print('=' * 60)
        display(lockouts)
    else:
        print('No front row lockouts this season')

---

## Summary

This notebook analyzed qualifying performance for the 2025 F1 season:

- **Pole Position Statistics** - Distribution of poles across drivers
- **Q3 Performance** - Appearances and average positions
- **Qualifying Heatmap** - Visual matrix of positions per race
- **Time Improvement** - Q1 to Q3 progression analysis
- **Team Performance** - Constructor qualifying comparison
- **Gap to Pole** - Time deficit analysis
- **Consistency Metrics** - Position variance analysis

---
*Created with F1 2025 Season Data Analysis Pipeline*

In [14]:
# Load Qualifying Data
data_path = project_root / 'data' / 'Formula1_2025Season_QualifyingResults.csv'
df_quali = pd.read_csv(data_path)
print(f'Loaded {len(df_quali)} qualifying results from {df_quali["Track"].nunique()} races')
df_quali.head()

Loaded 459 qualifying results from 23 races


Unnamed: 0,Track,Position,No,Driver,Team,Q1,Q2,Q3,Laps
0,Australia,1,4,Lando Norris,McLaren Mercedes,1:15.912,1:15.415,1:15.096,20
1,Australia,2,81,Oscar Piastri,McLaren Mercedes,1:16.062,1:15.468,1:15.180,18
2,Australia,3,1,Max Verstappen,Red Bull Racing Honda RBPT,1:16.018,1:15.565,1:15.481,17
3,Australia,4,63,George Russell,Mercedes,1:15.971,1:15.798,1:15.546,21
4,Australia,5,22,Yuki Tsunoda,Racing Bulls Honda RBPT,1:16.225,1:16.009,1:15.670,18


## 1. Data Preprocessing

In [15]:
# Function to convert time string to seconds
def time_to_seconds(time_str):
    if pd.isna(time_str) or time_str == '' or time_str == 'No Time':
        return np.nan
    try:
        if ':' in str(time_str):
            parts = str(time_str).split(':')
            return float(parts[0]) * 60 + float(parts[1])
        return float(time_str)
    except:
        return np.nan

# Convert qualifying times to seconds
df_quali['Q1_sec'] = df_quali['Q1'].apply(time_to_seconds)
df_quali['Q2_sec'] = df_quali['Q2'].apply(time_to_seconds)
df_quali['Q3_sec'] = df_quali['Q3'].apply(time_to_seconds)

print('Time conversion complete!')
df_quali[['Driver', 'Q1', 'Q1_sec', 'Q2', 'Q2_sec', 'Q3', 'Q3_sec']].head(10)

Time conversion complete!


Unnamed: 0,Driver,Q1,Q1_sec,Q2,Q2_sec,Q3,Q3_sec
0,Lando Norris,1:15.912,75.912,1:15.415,75.415,1:15.096,75.096
1,Oscar Piastri,1:16.062,76.062,1:15.468,75.468,1:15.180,75.18
2,Max Verstappen,1:16.018,76.018,1:15.565,75.565,1:15.481,75.481
3,George Russell,1:15.971,75.971,1:15.798,75.798,1:15.546,75.546
4,Yuki Tsunoda,1:16.225,76.225,1:16.009,76.009,1:15.670,75.67
5,Alexander Albon,1:16.245,76.245,1:16.017,76.017,1:15.737,75.737
6,Charles Leclerc,1:16.029,76.029,1:15.827,75.827,1:15.755,75.755
7,Lewis Hamilton,1:16.213,76.213,1:15.919,75.919,1:15.973,75.973
8,Pierre Gasly,1:16.328,76.328,1:16.112,76.112,1:15.980,75.98
9,Carlos Sainz,1:16.360,76.36,1:15.931,75.931,1:16.062,76.062


## 2. Pole Positions

In [16]:
# Pole position for each race
poles = df_quali[df_quali['Position'] == 1][['Track', 'Driver', 'Team', 'Q3']]
print('Pole Positions 2025 Season:')
poles

Pole Positions 2025 Season:


Unnamed: 0,Track,Driver,Team,Q3


In [17]:
# Pole count per driver
pole_count = poles['Driver'].value_counts()

fig = px.pie(values=pole_count.values, names=pole_count.index,
             title='Pole Position Distribution - 2025 Season',
             color_discrete_sequence=px.colors.sequential.Plasma_r)
fig.update_layout(template='plotly_dark')
fig.show()

## 3. Q3 Performance Analysis

In [18]:
# Drivers who reached Q3 most often
q3_appearances = df_quali[df_quali['Q3_sec'].notna()].groupby('Driver').size().sort_values(ascending=False)

fig = px.bar(q3_appearances.reset_index(name='Q3 Appearances'), 
             x='Q3 Appearances', y='Driver', orientation='h',
             color='Q3 Appearances', color_continuous_scale='Viridis',
             title='Q3 Appearances per Driver')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()

In [19]:
# Average qualifying position
avg_quali_pos = df_quali.groupby('Driver')['Position'].mean().sort_values()

fig = px.bar(avg_quali_pos.reset_index(name='Avg Quali Position'),
             x='Avg Quali Position', y='Driver', orientation='h',
             color='Avg Quali Position', color_continuous_scale='RdYlGn_r',
             title='Average Qualifying Position per Driver')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()

TypeError: agg function failed [how->mean,dtype->object]

## 4. Qualifying Heatmap

In [20]:
# Driver vs Track Qualifying Position Heatmap
quali_pivot = df_quali.pivot(index='Driver', columns='Track', values='Position')

fig = px.imshow(quali_pivot, 
                color_continuous_scale='RdYlGn_r',
                title='Qualifying Positions per Driver per Race',
                labels={'color': 'Position'})
fig.update_layout(template='plotly_dark', height=700)
fig.show()

## 5. Q1 vs Q2 vs Q3 Comparison

In [21]:
# Time improvement through qualifying sessions
q_times = df_quali[df_quali['Q3_sec'].notna()].copy()

# Calculate improvement Q1 to Q3
q_times['Q1_to_Q3_improvement'] = q_times['Q1_sec'] - q_times['Q3_sec']

# Average improvement per driver
avg_improvement = q_times.groupby('Driver')['Q1_to_Q3_improvement'].mean().sort_values(ascending=False)

fig = px.bar(avg_improvement.reset_index(name='Avg Improvement (sec)'),
             x='Avg Improvement (sec)', y='Driver', orientation='h',
             color='Avg Improvement (sec)', color_continuous_scale='Greens',
             title='Average Time Improvement Q1 to Q3 (seconds)')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()

## 6. Team Qualifying Performance

In [22]:
# Average quali position per team
team_quali = df_quali.groupby('Team')['Position'].mean().sort_values()

fig = px.bar(team_quali.reset_index(name='Avg Quali Position'),
             x='Team', y='Avg Quali Position',
             color='Avg Quali Position', color_continuous_scale='RdYlGn_r',
             title='Average Qualifying Position per Team')
fig.update_layout(template='plotly_dark', xaxis_tickangle=-45)
fig.show()

TypeError: agg function failed [how->mean,dtype->object]

In [23]:
# Team Q3 rate
total_entries = df_quali.groupby('Team').size()
q3_entries = df_quali[df_quali['Q3_sec'].notna()].groupby('Team').size()
q3_rate = (q3_entries / total_entries * 100).fillna(0).sort_values(ascending=False)

fig = px.bar(q3_rate.reset_index(name='Q3 Rate (%)'),
             x='Team', y='Q3 Rate (%)',
             color='Q3 Rate (%)', color_continuous_scale='Blues',
             title='Q3 Qualification Rate per Team (%)')
fig.update_layout(template='plotly_dark', xaxis_tickangle=-45)
fig.show()

## 7. Qualifying Gap Analysis

In [24]:
# Gap to pole per track
def calculate_gap_to_pole(group):
    pole_time = group[group['Position'] == 1]['Q3_sec'].values
    if len(pole_time) > 0:
        group['Gap_to_Pole'] = group['Q3_sec'] - pole_time[0]
    else:
        group['Gap_to_Pole'] = np.nan
    return group

df_quali = df_quali.groupby('Track', group_keys=False).apply(calculate_gap_to_pole)

# Average gap to pole per driver
avg_gap = df_quali[df_quali['Q3_sec'].notna()].groupby('Driver')['Gap_to_Pole'].mean().sort_values()

fig = px.bar(avg_gap.reset_index(name='Avg Gap to Pole (sec)'),
             x='Avg Gap to Pole (sec)', y='Driver', orientation='h',
             color='Avg Gap to Pole (sec)', color_continuous_scale='RdYlGn_r',
             title='Average Gap to Pole Position (Q3 drivers only)')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()





## 8. Qualifying Position Distribution

In [25]:
# Position distribution for top teams
top_teams = ['Red Bull Racing', 'McLaren', 'Ferrari', 'Mercedes']
df_top_teams = df_quali[df_quali['Team'].isin(top_teams)]

fig = px.box(df_top_teams, x='Team', y='Position', color='Team',
             title='Qualifying Position Distribution - Top Teams')
fig.update_layout(template='plotly_dark')
fig.show()





In [26]:
# Front row lockouts
front_row = df_quali[df_quali['Position'] <= 2][['Track', 'Driver', 'Team', 'Position']]
front_row_teams = front_row.groupby(['Track', 'Team']).size().reset_index(name='Drivers')
lockouts = front_row_teams[front_row_teams['Drivers'] == 2]

print('Front Row Lockouts (1-2 finish by same team):')
lockouts

TypeError: '<=' not supported between instances of 'str' and 'int'

## 9. Qualifying Consistency

In [27]:
# Standard deviation of qualifying position (lower = more consistent)
quali_consistency = df_quali.groupby('Driver')['Position'].std().sort_values()

fig = px.bar(quali_consistency.reset_index(name='Position StdDev'),
             x='Position StdDev', y='Driver', orientation='h',
             color='Position StdDev', color_continuous_scale='RdYlGn_r',
             title='Qualifying Consistency (lower = more consistent)')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, template='plotly_dark')
fig.show()

ValueError: could not convert string to float: 'NC'

---
## Summary

Notebook ini menganalisis performa qualifying 2025 dengan:
- Pole position statistics
- Q1/Q2/Q3 performance analysis
- Team qualifying comparison
- Gap to pole analysis
- Qualifying consistency metrics