In [2]:
"""
F1 2025 Season Deep Analysis (Rounds 1-14)
==========================================
A comprehensive analysis of the first 14 races of the 2025 Formula 1 season.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Read the data
df = pd.read_csv('F1_2025_R1_R14_updated.csv')

# Data preprocessing
def clean_position(pos):
    """Convert position strings to numeric values"""
    if pd.isna(pos) or pos == 'DNF' or pos == 'DNS' or pos == 'DSQ' or pos == 'NC':
        return np.nan
    try:
        return int(pos)
    except:
        return np.nan

def clean_time(time_str):
    """Convert time strings to seconds"""
    if pd.isna(time_str) or time_str == 'DNF' or time_str == 'DNS':
        return np.nan
    try:
        if ':' in str(time_str):
            parts = str(time_str).split(':')
            minutes = float(parts[0])
            seconds = float(parts[1].replace('s', ''))
            return minutes * 60 + seconds
        elif '+' in str(time_str):
            return float(str(time_str).replace('+', '').replace('s', ''))
        else:
            return float(str(time_str).replace('s', ''))
    except:
        return np.nan

# Clean position columns
position_cols = ['P1_Pos', 'P2_Pos', 'P3_Pos', 'Qualifying_Pos', 'Starting_Grid_Pos',
                'Race_Result_Pos', 'Sprint_Pos', 'Sprint_Grid_Pos', 'Sprint_Qualifying_Pos']
for col in position_cols:
    df[col] = df[col].apply(clean_position)

# ===========================================
# 1. CHAMPIONSHIP STANDINGS ANALYSIS
# ===========================================

print("=" * 60)
print("F1 2025 SEASON ANALYSIS (Rounds 1-14)")
print("=" * 60)

# Calculate total points per driver
driver_points = df.groupby('DRIVER').agg({
    'Race_Result_Pts': 'sum',
    'Sprint_Pts': lambda x: x.sum() if x.notna().any() else 0
}).reset_index()
driver_points['Total_Points'] = driver_points['Race_Result_Pts'] + driver_points['Sprint_Pts']
driver_points = driver_points.sort_values('Total_Points', ascending=False)

print("\n📊 DRIVERS' CHAMPIONSHIP STANDINGS")
print("-" * 40)
for i, row in driver_points.head(10).iterrows():
    print(f"{i+1:2}. {row['DRIVER']:20} {row['Total_Points']:3.0f} pts "
          f"(Race: {row['Race_Result_Pts']:.0f}, Sprint: {row['Sprint_Pts']:.0f})")

# Team standings
df['Team_Points'] = df['Race_Result_Pts'].fillna(0) + df['Sprint_Pts'].fillna(0)
team_points = df.groupby('TEAM')['Team_Points'].sum().sort_values(ascending=False)

print("\n🏁 CONSTRUCTORS' CHAMPIONSHIP STANDINGS")
print("-" * 40)
for i, (team, points) in enumerate(team_points.head(10).items()):
    print(f"{i+1:2}. {team:20} {points:3.0f} pts")

# ===========================================
# 2. PERFORMANCE CONSISTENCY ANALYSIS
# ===========================================

print("\n📈 DRIVER CONSISTENCY ANALYSIS")
print("-" * 40)

# Calculate consistency metrics for top drivers
consistency_metrics = []
for driver in driver_points.head(10)['DRIVER']:
    driver_data = df[df['DRIVER'] == driver]
    race_positions = driver_data['Race_Result_Pos'].dropna()

    if len(race_positions) > 0:
        consistency_metrics.append({
            'Driver': driver,
            'Avg_Position': race_positions.mean(),
            'Std_Dev': race_positions.std(),
            'Best': race_positions.min(),
            'Worst': race_positions.max(),
            'Podiums': (race_positions <= 3).sum(),
            'Wins': (race_positions == 1).sum(),
            'DNFs': driver_data['Race_Result_Pos'].isna().sum()
        })

consistency_df = pd.DataFrame(consistency_metrics)
consistency_df = consistency_df.sort_values('Avg_Position')

print("\nMost Consistent Drivers (by average position):")
for _, row in consistency_df.head(5).iterrows():
    print(f"{row['Driver']:20} Avg: {row['Avg_Position']:.2f} "
          f"(σ={row['Std_Dev']:.2f}) Wins: {row['Wins']:.0f} Podiums: {row['Podiums']:.0f}")

# ===========================================
# 3. QUALIFYING VS RACE PERFORMANCE
# ===========================================

print("\n🏎️ QUALIFYING VS RACE PERFORMANCE")
print("-" * 40)

# Analyze position changes from qualifying to race
df['Position_Change'] = df['Qualifying_Pos'] - df['Race_Result_Pos']
avg_position_change = df.groupby('DRIVER')['Position_Change'].mean().sort_values(ascending=False)

print("\nBest Race Day Performers (avg positions gained):")
for driver, change in avg_position_change.head(5).items():
    if not pd.isna(change):
        print(f"{driver:20} +{change:.2f} positions on average")

print("\nPoor Race Day Performers (avg positions lost):")
for driver, change in avg_position_change.tail(5).items():
    if not pd.isna(change):
        print(f"{driver:20} {change:.2f} positions on average")

# ===========================================
# 4. TEAM PERFORMANCE ANALYSIS
# ===========================================

print("\n🏆 TEAM PERFORMANCE METRICS")
print("-" * 40)

team_stats = df.groupby('TEAM').agg({
    'Race_Result_Pos': lambda x: x.dropna().mean(),
    'Qualifying_Pos': lambda x: x.dropna().mean(),
    'Race_Result_Pts': 'sum',
    'Sprint_Pts': lambda x: x.sum() if x.notna().any() else 0
}).round(2)

team_stats['Total_Points'] = team_stats['Race_Result_Pts'] + team_stats['Sprint_Pts']
team_stats = team_stats.sort_values('Total_Points', ascending=False)

print("\nTeam Performance Summary:")
print(f"{'Team':<20} {'Avg Quali':<10} {'Avg Race':<10} {'Points':<8}")
print("-" * 48)
for team, row in team_stats.iterrows():
    print(f"{team:<20} {row['Qualifying_Pos']:<10.2f} {row['Race_Result_Pos']:<10.2f} "
          f"{row['Total_Points']:<8.0f}")

# ===========================================
# 5. SPRINT RACE ANALYSIS
# ===========================================

print("\n⚡ SPRINT RACE ANALYSIS")
print("-" * 40)

sprint_data = df[df['Sprint_Pos'].notna()]
sprint_gps = sprint_data['GP'].unique()
print(f"Sprint races held at: {', '.join(sprint_gps)}")

sprint_points = sprint_data.groupby('DRIVER')['Sprint_Pts'].sum().sort_values(ascending=False)
print("\nTop Sprint Race Scorers:")
for driver, points in sprint_points.head(5).items():
    if points > 0:
        print(f"{driver:20} {points:.0f} sprint points")

# ===========================================
# 6. PERFORMANCE TRENDS
# ===========================================

print("\n📉 SEASON PROGRESSION ANALYSIS")
print("-" * 40)

# Extract race number from GP column
df['Race_Number'] = df['GP'].str.extract(r'R(\d+)').astype(float)

# Analyze top 3 drivers' progression
top_drivers = driver_points.head(3)['DRIVER'].tolist()

print("\nPoints progression for championship leaders:")
for driver in top_drivers:
    driver_races = df[df['DRIVER'] == driver].sort_values('Race_Number')
    cumulative_points = (driver_races['Race_Result_Pts'].fillna(0) +
                        driver_races['Sprint_Pts'].fillna(0)).cumsum()
    if len(cumulative_points) > 0:
        latest_points = cumulative_points.iloc[-1]
        print(f"{driver:20} {latest_points:.0f} pts after {len(driver_races)} races")

# ===========================================
# 7. INTERESTING STATISTICS
# ===========================================

print("\n🎯 INTERESTING STATISTICS")
print("-" * 40)

# Fastest lap analysis
fastest_laps = df[df['Fastest_Lap'] == df.groupby('GP')['Fastest_Lap'].transform('min')]
fastest_lap_counts = fastest_laps['DRIVER'].value_counts()

print("\nMost Fastest Laps:")
for driver, count in fastest_lap_counts.head(5).items():
    print(f"{driver:20} {count} fastest laps")

# DNF analysis
dnf_counts = df[df['Race_Result_Pos'].isna()].groupby('DRIVER').size().sort_values(ascending=False)
print("\nMost DNFs:")
for driver, count in dnf_counts.head(5).items():
    if count > 0:
        print(f"{driver:20} {count} DNFs")

# Perfect weekends (Pole + Win + Fastest Lap)
perfect_weekends = df[(df['Qualifying_Pos'] == 1) &
                      (df['Race_Result_Pos'] == 1) &
                      (df['Fastest_Lap_Pos'] == 1)]
if len(perfect_weekends) > 0:
    print("\nPerfect Weekends (Pole + Win + Fastest Lap):")
    for _, row in perfect_weekends.iterrows():
        print(f"{row['DRIVER']:20} at {row['GP']}")

# ===========================================
# 8. ROOKIE ANALYSIS
# ===========================================

print("\n🌟 ROOKIE PERFORMANCE")
print("-" * 40)

# Identify potential rookies (drivers with limited appearances or known rookies)
rookies = ['Kimi Antonelli', 'Oliver Bearman', 'Gabriel Bortoleto',
           'Jack Doohan', 'Isack Hadjar']

rookie_data = []
for rookie in rookies:
    rookie_name = df[df['DRIVER'].str.contains(rookie.split()[-1], na=False)]['DRIVER'].unique()
    if len(rookie_name) > 0:
        rookie_df = df[df['DRIVER'] == rookie_name[0]]
        if len(rookie_df) > 0:
            rookie_data.append({
                'Driver': rookie_name[0],
                'Races': len(rookie_df),
                'Points': rookie_df['Race_Result_Pts'].sum() + rookie_df['Sprint_Pts'].fillna(0).sum(),
                'Best_Result': rookie_df['Race_Result_Pos'].min(),
                'Avg_Position': rookie_df['Race_Result_Pos'].mean()
            })

if rookie_data:
    rookie_df = pd.DataFrame(rookie_data).sort_values('Points', ascending=False)
    print("\nRookie Standings:")
    for _, row in rookie_df.iterrows():
        print(f"{row['Driver']:20} {row['Points']:.0f} pts | "
              f"Best: P{row['Best_Result']:.0f} | Avg: {row['Avg_Position']:.1f}")

# ===========================================
# 9. HEAD-TO-HEAD TEAMMATE BATTLES
# ===========================================

print("\n👥 TEAMMATE HEAD-TO-HEAD")
print("-" * 40)

# Group drivers by team and compare
team_battles = {}
for team in df['TEAM'].unique():
    team_drivers = df[df['TEAM'] == team]['DRIVER'].unique()
    if len(team_drivers) >= 2:
        # Take the two most frequent drivers for each team
        driver_counts = df[df['TEAM'] == team]['DRIVER'].value_counts()
        main_drivers = driver_counts.head(2).index.tolist()

        if len(main_drivers) == 2:
            d1_data = df[(df['DRIVER'] == main_drivers[0]) & (df['TEAM'] == team)]
            d2_data = df[(df['DRIVER'] == main_drivers[1]) & (df['TEAM'] == team)]

            # Find common races
            common_gps = set(d1_data['GP'].unique()) & set(d2_data['GP'].unique())

            quali_wins = 0
            race_wins = 0
            for gp in common_gps:
                d1_quali = d1_data[d1_data['GP'] == gp]['Qualifying_Pos'].values[0]
                d2_quali = d2_data[d2_data['GP'] == gp]['Qualifying_Pos'].values[0]
                d1_race = d1_data[d1_data['GP'] == gp]['Race_Result_Pos'].values[0]
                d2_race = d2_data[d2_data['GP'] == gp]['Race_Result_Pos'].values[0]

                if not pd.isna(d1_quali) and not pd.isna(d2_quali):
                    if d1_quali < d2_quali:
                        quali_wins += 1
                if not pd.isna(d1_race) and not pd.isna(d2_race):
                    if d1_race < d2_race:
                        race_wins += 1

            if len(common_gps) > 0:
                team_battles[team] = {
                    'Driver1': main_drivers[0],
                    'Driver2': main_drivers[1],
                    'Quali': f"{quali_wins}-{len(common_gps)-quali_wins}",
                    'Race': f"{race_wins}-{len(common_gps)-race_wins}"
                }

print("\nTeammate Qualifying & Race Battles:")
for team, battle in team_battles.items():
    print(f"\n{team}:")
    print(f"  {battle['Driver1']} vs {battle['Driver2']}")
    print(f"  Qualifying: {battle['Quali']} | Race: {battle['Race']}")

# ===========================================
# 10. CHAMPIONSHIP MOMENTUM
# ===========================================

print("\n💨 CHAMPIONSHIP MOMENTUM (Last 5 Races)")
print("-" * 40)

# Analyze last 5 races
last_races = df['Race_Number'].dropna().unique()
last_races = sorted(last_races)[-5:]

recent_form = []
for driver in driver_points.head(10)['DRIVER']:
    recent_data = df[(df['DRIVER'] == driver) & (df['Race_Number'].isin(last_races))]
    recent_points = (recent_data['Race_Result_Pts'].fillna(0) +
                    recent_data['Sprint_Pts'].fillna(0)).sum()
    avg_position = recent_data['Race_Result_Pos'].mean()

    recent_form.append({
        'Driver': driver,
        'Recent_Points': recent_points,
        'Avg_Position': avg_position
    })

recent_form_df = pd.DataFrame(recent_form).sort_values('Recent_Points', ascending=False)

print("\nDrivers in Best Form (last 5 races):")
for _, row in recent_form_df.head(5).iterrows():
    print(f"{row['Driver']:20} {row['Recent_Points']:.0f} pts "
          f"(Avg P{row['Avg_Position']:.1f})")

print("\n" + "=" * 60)
print("END OF ANALYSIS")
print("=" * 60)

F1 2025 SEASON ANALYSIS (Rounds 1-14)

📊 DRIVERS' CHAMPIONSHIP STANDINGS
----------------------------------------
26. Oscar Piastri        284 pts (Race: 263, Sprint: 21)
19. Lando Norris         275 pts (Race: 260, Sprint: 15)
23. Max Verstappen       187 pts (Race: 173, Sprint: 14)
14. George Russell       172 pts (Race: 162, Sprint: 10)
 6. Charles Leclerc      151 pts (Race: 142, Sprint: 9)
20. Lewis Hamilton       109 pts (Race: 95, Sprint: 14)
17. Kimi Antonelli        64 pts (Race: 60, Sprint: 4)
 1. Alexander Albon       54 pts (Race: 54, Sprint: 0)
24. Nico Hulkenberg       37 pts (Race: 37, Sprint: 0)
 8. Esteban Ocon          27 pts (Race: 23, Sprint: 4)

🏁 CONSTRUCTORS' CHAMPIONSHIP STANDINGS
----------------------------------------
 1. McLaren              559 pts
 2. Ferrari              260 pts
 3. Mercedes             236 pts
 4. Red Bull Racing      194 pts
 5. Williams              70 pts
 6. Aston Martin          52 pts
 7. Kick Sauber           51 pts
 8. Racing Bul