In [32]:
import fastf1 as ff1
import pandas as pd

ff1.Cache.enable_cache('Data/.cache')

In [33]:
YEARS_TO_COLLECT = range(2019, 2024)
all_race_data = []
for year in YEARS_TO_COLLECT:
    print(f"Fetching data for year: {year}")
    # Get the event schedule for the year
    try:
        schedule = ff1.get_event_schedule(year, include_testing=False)
    except Exception as e:
        print(f"Could not get schedule for {year}: {e}")
        continue
    # Loop through each event in the schedule
    for index, event in schedule.iterrows():
        if event['EventFormat'] not in ['conventional', 'sprint_qualifying', 'sprint_shootout']: # Adjust as needed based on F1 formats
            continue

        event_name = event['EventName']
        print(f"  Processing Event: {event_name}")

        try:
            # --- Load Qualifying Data ---
            qualify_session = ff1.get_session(year, event_name, 'Q')
            qualify_session.load(laps=False, telemetry=False, weather=False, messages=False) # Only need results for grid pos
            if qualify_session.results is None:
                print(f"    No Qualifying results found for {event_name}, {year}. Skipping.")
                continue
            # Get qualifying positions into a dictionary for easy lookup
            qualifying_positions = qualify_session.results.set_index('Abbreviation')['GridPosition'].to_dict()

            # --- Load Race Data ---
            race_session = ff1.get_session(year, event_name, 'R') # 'R' for Race
            # Load necessary data - start with results, maybe add laps later if needed for pace analysis
            # Be mindful of memory: telemetry=True can be very heavy
            race_session.load(laps=True, telemetry=False, weather=True, messages=False)
            if race_session.results is None:
                print(f"    No Race results found for {event_name}, {year}. Skipping.")
                continue

            for drv_abbr, result in race_session.results.set_index('Abbreviation').iterrows():
                grid_pos = qualifying_positions.get(drv_abbr, None) # Get grid pos from qualify data

                # Check if grid position is valid (e.g., 0 means Pit Lane start, handle appropriately)
                if grid_pos is None or pd.isna(grid_pos) or grid_pos == 0:
                    grid_pos = 20 # Or max grid size, or handle as missing data - needs careful thought!

                # Basic features for this driver *for this race*
                driver_data = {
                    'Year': year,
                    'EventName': event_name,
                    'DriverAbbreviation': drv_abbr,
                    'TeamName': result['TeamName'],
                    'GridPosition': grid_pos,
                    'FinishPosition': result['Position'],
                    'Status': result['Status'],
                    'Points': result['Points'],
                    # TARGET VARIABLE: Did the driver win? (Position == 1)
                    'WonRace': 1 if result['Position'] == 1 else 0
                    # Add more features here! E.g., from race_session.laps, race_session.weather_data
                    # You'll also need to engineer features based on *previous* races (lagged features)
                }
                all_race_data.append(driver_data)

        except ff1.RateLimitExceededError as e:
            print(f"    Rate limit exceeded for {event_name}, {year}: {e}. Consider adding delays.")
            # Potentially add a time.sleep(60) here and retry or break
        except Exception as e:
            # Catch other potential errors during session loading or processing
            print(f"    An error occurred processing {event_name}, {year}: {e}. Skipping.")
# --- Create DataFrame ---
final_df = pd.DataFrame(all_race_data)

# --- Save Data ---
final_df.to_csv('f1_race_data_raw.csv', index=False)
print("Raw data saved to f1_race_data_raw.csv")

print(final_df.head())
print(f"Total records fetched: {len(final_df)}")

req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Fetching data for year: 2019




Could not get schedule for 2019: Failed to load any schedule data.
Fetching data for year: 2020


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Could not get schedule for 2020: Failed to load any schedule data.
Fetching data for year: 2021


req            INFO 	Using cached data for season_schedule


  Processing Event: Bahrain Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
core           INFO 	Finished loading data for 20 drivers: ['33', '44', '77', '16', '10', '3', '4', '55', '14', '18', '11', '99', '22', '7', '63', '31', '6', '5', '47', '9']
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing t

  Processing Event: Emilia Romagna Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
core           INFO 	Finished loading data for 20 drivers: ['33', '44', '4', '16', '55', '3', '10', '18', '31', '14', '11', '22', '7', '99', '5', '47', '9', '77', '63', '6']


  Processing Event: Portuguese Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Portuguese Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Portuguese Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached da

  Processing Event: Spanish Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Spanish Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data fou

  Processing Event: Monaco Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Monaco Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found

  Processing Event: Azerbaijan Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Azerbaijan Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached da

  Processing Event: French Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for French Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
Request for URL https://api.jolpi.ca/ergast/f1/2021/7/qualifying.json failed; using cached response
Traceback (most recent call last):
  File "/opt/miniconda3/envs/F1_Prediction/lib/python3.12/site-packages/requests_cache/session.py", line 286, in _resend
    response = self._send_and_cache(request, actions, cached_response, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/F1_Prediction/lib/python3.12/site-packages/requests_cache/session.py", line 254, in _send_and_cache
    response = super().send(request, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/F1_Prediction/lib/python3.12/site-packages/fastf1/req.py", line 134, in send
    l

  Processing Event: Styrian Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Styrian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Styrian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data fou

  Processing Event: Austrian Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Austrian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data f

  Processing Event: Hungarian Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Hungarian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data

  Processing Event: Belgian Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Belgian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data fou

  Processing Event: Dutch Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Dutch Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found f

  Processing Event: Russian Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Russian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Russian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data fou

  Processing Event: Turkish Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Turkish Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Turkish Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data fou

  Processing Event: United States Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for United States Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for United States Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cac

  Processing Event: Mexico City Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Mexico City Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached 

  Processing Event: Qatar Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Qatar Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found f

  Processing Event: Saudi Arabian Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Saudi Arabian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cac

  Processing Event: Abu Dhabi Grand Prix


req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Abu Dhabi Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
core           INFO 	Finished loading data for 0 drivers: []
req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data

Fetching data for year: 2022


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Could not get schedule for 2022: Failed to load any schedule data.
Fetching data for year: 2023


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...


Could not get schedule for 2023: Failed to load any schedule data.
Raw data saved to f1_race_data_raw.csv
   Year           EventName DriverAbbreviation         TeamName  GridPosition  \
0  2021  Bahrain Grand Prix                HAM         Mercedes            20   
1  2021  Bahrain Grand Prix                VER  Red Bull Racing            20   
2  2021  Bahrain Grand Prix                BOT         Mercedes            20   
3  2021  Bahrain Grand Prix                NOR          McLaren            20   
4  2021  Bahrain Grand Prix                PER  Red Bull Racing            20   

   FinishPosition    Status  Points  WonRace  
0             1.0  Finished    25.0        1  
1             2.0  Finished    18.0        0  
2             3.0  Finished    16.0        0  
3             4.0  Finished    12.0        0  
4             5.0  Finished    10.0        0  
Total records fetched: 40
