In [18]:
import requests
import pandas as pd
import os
import time
import concurrent.futures
from functools import lru_cache

BASE_URL = "https://api.jolpi.ca/ergast/f1"
OUTPUT_DIR = "f1_csv_data"
os.makedirs(OUTPUT_DIR, exist_ok=True)

session = requests.Session()

def convert_duration_to_seconds(duration_str):
    if not duration_str or duration_str.strip() == "":
        return None
    try:
        if ':' in duration_str:
            minutes, seconds = duration_str.split(':')
            return int(minutes) * 60 + float(seconds)
        return float(duration_str)
    except Exception:
        return None

@lru_cache(maxsize=None)
def get_available_seasons():
    data = fetch_api_data("seasons.json?limit=1000")
    if not data:
        return []
    seasons = data["MRData"]["SeasonTable"]["Seasons"]
    return [int(s["season"]) for s in seasons if int(s["season"]) >= 2010]

@lru_cache(maxsize=None)
def get_rounds_for_season(season):
    data = fetch_api_data(f"{season}.json")
    if not data:
        return []
    races = data["MRData"]["RaceTable"]["Races"]
    return [int(r["round"]) for r in races]

@lru_cache(maxsize=512)
def fetch_api_data(endpoint, retries=5, backoff=2.0):
    url = f"{BASE_URL}/{endpoint}"
    for attempt in range(retries):
        try:
            response = session.get(url)
            if response.status_code == 429:
                wait = backoff * (2 ** attempt)
                print(f"⏳ Rate limit hit. Sleeping for {wait:.1f} seconds...")
                time.sleep(wait)
                continue
            response.raise_for_status()
            time.sleep(0.2)  # Global pacing to reduce throttle risk
            return response.json()
        except Exception as e:
            if attempt == retries - 1:
                print(f"❌ Failed after {retries} attempts: {url} - {e}")
                return None
            time.sleep(backoff * (2 ** attempt))
    return None

def process_season_round(args):
    season, round_num = args
    results = {}

    # 1. Qualifying
    qual_data = fetch_api_data(f"{season}/{round_num}/qualifying.json")
    if qual_data and qual_data["MRData"]["RaceTable"]["Races"]:
        results["qualifying"] = []
        for race in qual_data["MRData"]["RaceTable"]["Races"]:
            for result in race.get("QualifyingResults", []):
                results["qualifying"].append({
                    "season": season,
                    "round": round_num,
                    "driver": result["Driver"]["familyName"],
                    "constructor": result["Constructor"]["name"],
                    "q1": result.get("Q1"),
                    "q2": result.get("Q2"),
                    "q3": result.get("Q3"),
                })

    # 2, 3, 6. Race Results + Season Performance + DNF
    race_data = fetch_api_data(f"{season}/{round_num}/results.json")
    if race_data and race_data["MRData"]["RaceTable"]["Races"]:
        results["race_results"] = []
        results["season_performance"] = []
        results["dnf"] = []

        for race in race_data["MRData"]["RaceTable"]["Races"]:
            for result in race.get("Results", []):
                status = result.get("status", "").lower()
                dnf = int(any(x in status for x in ["retired", "accident", "engine", "brakes", "hydraulic"]))

                results["race_results"].append({
                    "season": season,
                    "round": round_num,
                    "driver": result["Driver"]["familyName"],
                    "constructor": result["Constructor"]["name"],
                    "position": result.get("position"),
                    "grid": result.get("grid"),
                    "status": result.get("status"),
                    "points": result.get("points")
                })

                results["season_performance"].append({
                    "season": season,
                    "round": round_num,
                    "driver": result["Driver"]["familyName"],
                    "constructor": result["Constructor"]["name"],
                    "position": result.get("position"),
                    "points": result.get("points")
                })

                results["dnf"].append({
                    "season": season,
                    "round": round_num,
                    "driver": result["Driver"]["familyName"],
                    "constructor": result["Constructor"]["name"],
                    "status": status,
                    "dnf": dnf
                })

    # 4. Pit Stop Efficiency
    pit_data = fetch_api_data(f"{season}/{round_num}/pitstops.json?limit=100")
    if pit_data and pit_data["MRData"]["RaceTable"]["Races"]:
        results["pit_stops"] = []
        for race in pit_data["MRData"]["RaceTable"]["Races"]:
            for stop in race.get("PitStops", []):
                duration = convert_duration_to_seconds(stop.get("duration"))
                results["pit_stops"].append({
                    "season": season,
                    "round": round_num,
                    "driverId": stop["driverId"],
                    "lap": int(stop["lap"]),
                    "stop": int(stop["stop"]),
                    "time": stop["time"],
                    "duration_seconds": duration
                })

    return results

def process_constructors(season):
    data = fetch_api_data(f"{season}/constructors.json?limit=100")
    if data and data["MRData"]["ConstructorTable"]["Constructors"]:
        results = []
        for con in data["MRData"]["ConstructorTable"]["Constructors"]:
            results.append({
                "season": season,
                "constructorId": con["constructorId"],
                "name": con["name"],
                "nationality": con.get("nationality")
            })
        return results
    return []

def main():
    start_time = time.time()
    print("🔧 Fetching seasons and rounds...")
    all_combinations = [
        (season, round_num)
        for season in get_available_seasons()
        for round_num in get_rounds_for_season(season)
    ]

    qualifying_data = []
    race_results_data = []
    season_performance_data = []
    pit_stop_data = []
    dnf_data = []

    constructor_data = []
    print("📦 Fetching constructor data...")
    for season in get_available_seasons():
        constructor_data.extend(process_constructors(season))

    print(f"🚀 Processing {len(all_combinations)} season-round combinations...")
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
        futures = {
            executor.submit(process_season_round, combo): combo for combo in all_combinations
        }

        for future in concurrent.futures.as_completed(futures):
            season, round_num = futures[future]
            try:
                data = future.result()
                print(f"✅ Season {season}, Round {round_num} done")
                qualifying_data.extend(data.get("qualifying", []))
                race_results_data.extend(data.get("race_results", []))
                season_performance_data.extend(data.get("season_performance", []))
                pit_stop_data.extend(data.get("pit_stops", []))
                dnf_data.extend(data.get("dnf", []))
            except Exception as e:
                print(f"❌ Error processing Season {season}, Round {round_num}: {e}")

    print("💾 Saving to CSV...")
    pd.DataFrame(qualifying_data).to_csv(f"{OUTPUT_DIR}/qualifying_times.csv", index=False)
    pd.DataFrame(race_results_data).to_csv(f"{OUTPUT_DIR}/race_results.csv", index=False)
    pd.DataFrame(season_performance_data).to_csv(f"{OUTPUT_DIR}/season_performance.csv", index=False)
    pd.DataFrame(pit_stop_data).to_csv(f"{OUTPUT_DIR}/pit_stop_efficiency.csv", index=False)
    pd.DataFrame(constructor_data).to_csv(f"{OUTPUT_DIR}/constructors.csv", index=False)
    pd.DataFrame(dnf_data).to_csv(f"{OUTPUT_DIR}/dnf_probability.csv", index=False)

    end_time = time.time()
    print(f"✅ Done! Total time: {end_time - start_time:.2f} seconds")

if __name__ == "__main__":
    main()

🔧 Fetching seasons and rounds...
⏳ Rate limit hit. Sleeping for 2.0 seconds...
⏳ Rate limit hit. Sleeping for 4.0 seconds...
⏳ Rate limit hit. Sleeping for 8.0 seconds...
⏳ Rate limit hit. Sleeping for 16.0 seconds...
⏳ Rate limit hit. Sleeping for 32.0 seconds...
📦 Fetching constructor data...
🚀 Processing 305 season-round combinations...
⏳ Rate limit hit. Sleeping for 2.0 seconds...
✅ Season 2010, Round 1 done
✅ Season 2010, Round 2 done
✅ Season 2010, Round 4 done
⏳ Rate limit hit. Sleeping for 4.0 seconds...
✅ Season 2010, Round 5 done
⏳ Rate limit hit. Sleeping for 2.0 seconds...
✅ Season 2010, Round 7 done
⏳ Rate limit hit. Sleeping for 4.0 seconds...
✅ Season 2010, Round 8 done
⏳ Rate limit hit. Sleeping for 8.0 seconds...
✅ Season 2010, Round 9 done
⏳ Rate limit hit. Sleeping for 8.0 seconds...
✅ Season 2010, Round 10 done
✅ Season 2010, Round 11 done
⏳ Rate limit hit. Sleeping for 2.0 seconds...
⏳ Rate limit hit. Sleeping for 16.0 seconds...
⏳ Rate limit hit. Sleeping for 4.0 