<a href="https://colab.research.google.com/github/perchedinthedark/formula1_predictions/blob/main/kokakolitza.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# fastf1 installation
!pip install fastf1

In [None]:
import fastf1
import os
import pandas as pd

# Cache directory set-up
cache_dir = "./cache"
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)

In [None]:
# Fetch 2024 season schedule
season = 2024
gp_list = fastf1.get_event_schedule(season, include_testing=False)

In [None]:
# Initialize empty list to store all collected data
data = []
# Track processed rounds to avoid duplicates
processed_rounds = set()

# Loop through each Grand Prix in the input list
for _, gp in gp_list.iterrows():
    round_num = gp['RoundNumber']

    # Skip if we've already processed this round
    if round_num in processed_rounds:
        continue

    # Mark round as processed immediately to prevent retries
    processed_rounds.add(round_num)

    # 1. Load Event Data
    try:
        event = fastf1.get_event(season, round_num)
    except ValueError:
        print(f"Skipping Round {round_num}: {gp['GP']} not available.")
        continue

    # 2. Load Qualifying Session Data
    try:
        session_q = event.get_session('Q')
        session_q.load()

        # Check if we have valid lap data
        if session_q.laps is None or session_q.laps.empty:
            print(f"No lap data available for Qualifying in {event.EventName}. Skipping.")
            continue
    except Exception as e:
        print(f"Failed to load qualifying session for {event.EventName} (Round {round_num}): {e}")
        continue

    # 3. Process Qualifying Data
    try:
        # Get top 3 drivers from Q3
        q3_results = session_q.results.sort_values("Q3").head(3)
        q3_drivers = q3_results["Abbreviation"].tolist()  # Store for sprint quali filtering

        # Find fastest practice lap across all practice sessions
        fastest_fp_lap = None
        for fp in ['FP1', 'FP2', 'FP3']:
            try:
                session_fp = event.get_session(fp)
                session_fp.load()
                if session_fp.laps is not None and not session_fp.laps.empty:
                    session_best = session_fp.laps.pick_fastest()
                    if fastest_fp_lap is None or session_best.LapTime < fastest_fp_lap:
                        fastest_fp_lap = session_best.LapTime
            except Exception as e:
                print(f"Session {fp} not available for {event.EventName}. Skipping...")

        # Get fastest lap for each driver in qualifying
        laps = session_q.laps
        fastest_laps = laps.groupby("Driver").apply(lambda x: x.loc[x["LapTime"].idxmin()])
        fastest_laps = fastest_laps.reset_index(drop=True)

        # Process each of the top 3 drivers
        for _, row in q3_results.iterrows():
            driver = row.Abbreviation

            # Get tire compound used on fastest lap
            fastest_lap_data = fastest_laps[fastest_laps["Driver"] == driver]
            tire_used = fastest_lap_data["Compound"].values[0] if not fastest_lap_data.empty else None

            # Calculate average speed from car data
            try:
                fastest_lap = laps.pick_driver(driver).pick_fastest()
                car_data = fastest_lap.get_car_data().add_distance()
                avg_speed = car_data['Speed'].mean()
            except Exception as e:
                print(f"Could not load car data for {driver} in {event.EventName}: {e}")
                avg_speed = None

            # Get weather data for the session
            try:
                air_temp = session_q.weather_data.AirTemp.mean()
                track_temp = session_q.weather_data.TrackTemp.mean()
                humidity = session_q.weather_data.Humidity.mean()
                wind_speed = session_q.weather_data.WindSpeed.mean()
            except Exception as e:
                print(f"Weather data not available for {event.EventName}: {e}")
                air_temp = track_temp = humidity = wind_speed = None

            # Compile all data for this driver
            data.append({
                "GP": event.EventName,
                "Round": event.RoundNumber,
                "Session": "Qualifying",
                "Driver": driver,
                "Team": row.TeamName,
                "Position": row.Position,
                "Time (s)": row.Q3.total_seconds(),
                "Gap to P1 (s)": (row.Q3 - q3_results.iloc[0].Q3).total_seconds(),
                "Fastest Practice Lap (s)": fastest_fp_lap.total_seconds() if fastest_fp_lap else None,
                "Average Speed (km/h)": avg_speed,
                "Air Temperature (°C)": air_temp,
                "Track Temperature (°C)": track_temp,
                "Humidity (%)": humidity,
                "Wind Speed (m/s)": wind_speed,
                "Tire Used": tire_used
            })
    except Exception as e:
        print(f"Unexpected failure during processing of Round {round_num} ({event.EventName}): {e}")
        continue

    # Sprint Qualifying Section - Only for Q3 drivers
    try:
        session_sq = event.get_session('SQ')
        session_sq.load()

        if session_sq.laps is None or session_sq.laps.empty:
            print(f"No lap data available for Sprint Qualifying in {event.EventName}. Skipping.")
        else:
            # Only process drivers who were in Q3 (q3_drivers list created earlier)
            for _, row in session_sq.results.iterrows():
                driver = row.Abbreviation
                if driver in q3_drivers:  # Only include if driver was in Q3
                    try:
                        # Get fastest lap data
                        fastest_lap_sq = session_sq.laps.pick_driver(driver).pick_fastest()
                        car_data_sq = fastest_lap_sq.get_car_data().add_distance()
                        avg_speed_sq = car_data_sq['Speed'].mean()
                        tire_used_sq = fastest_lap_sq["Compound"]
                    except Exception as e:
                        print(f"Could not load lap data for {driver} in Sprint Qualifying: {e}")
                        avg_speed_sq = None
                        tire_used_sq = None

                    # Handle time data (Q3 or Time)
                    q_time = getattr(row, "Q3", getattr(row, "Time", None))
                    time_sec = q_time.total_seconds() if q_time else None

                    # Calculate gap to P1 if time exists
                    if q_time and hasattr(session_sq.results.iloc[0], 'Q3'):
                        gap = (q_time - session_sq.results.iloc[0].Q3).total_seconds()
                    elif q_time and hasattr(session_sq.results.iloc[0], 'Time'):
                        gap = (q_time - session_sq.results.iloc[0].Time).total_seconds()
                    else:
                        gap = None

                    # Get weather data (same as regular Qualifying)
                    try:
                        weather_data = {
                            "Air Temperature (°C)": session_sq.weather_data.AirTemp.mean(),
                            "Track Temperature (°C)": session_sq.weather_data.TrackTemp.mean(),
                            "Humidity (%)": session_sq.weather_data.Humidity.mean(),
                            "Wind Speed (m/s)": session_sq.weather_data.WindSpeed.mean()
                        }
                    except Exception as e:
                        print(f"Weather data not available for Sprint Qualifying in {event.EventName}: {e}")
                        weather_data = {
                            "Air Temperature (°C)": None,
                            "Track Temperature (°C)": None,
                            "Humidity (%)": None,
                            "Wind Speed (m/s)": None
                        }

                    # Build data dictionary with all fields
                    sq_data = {
                        "GP": event.EventName,
                        "Round": event.RoundNumber,
                        "Session": "Sprint Qualifying",
                        "Driver": driver,
                        "Team": row.TeamName,
                        "Position": row.Position,
                        "Time (s)": time_sec,
                        "Gap to P1 (s)": gap,
                        "Fastest Practice Lap (s)": fastest_fp_lap.total_seconds() if fastest_fp_lap else None,
                        "Average Speed (km/h)": avg_speed_sq,
                        "Tire Used": tire_used_sq
                    }
                    sq_data.update(weather_data)  # Add weather fields

                    data.append(sq_data)

    except Exception as e:
        print(f"Sprint Qualifying not available for {event.EventName} (Round {round_num}): {e}")

In [None]:
# Save data to CSV
df = pd.DataFrame(data)
df.to_csv("f1_2024_top3_qualifying_and_sprint_qualifying_with_speed_jej.csv", index=False)
print("Data collection complete! CSV saved.")

In [None]:
# Download the file
from google.colab import files
files.download('f1_2024_top3_qualifying_and_sprint_qualifying_with_speed_jej.csv')