In [1]:
!pip install -q pandasql
!pip install -q gspread
!pip install -q gspread-dataframe

In [2]:
import pandas as pd
from pandasql import sqldf
import requests
from datetime import datetime, timedelta, timezone

In [3]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

In [4]:
# Fetching F1 Data
def fetch_data_to_dataframe(url: str, df_name: str = "data") -> pd.DataFrame:

    print(f"Attempting to fetch {df_name} from: {url}")
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx or 5xx)
        fetched_data = response.json()

        if fetched_data:
            df = pd.DataFrame(fetched_data)
            print(f"Successfully fetched {len(df)} rows for {df_name}.")
            return df
        else:
            print(f"No data found for {df_name} from the provided URL.")
            return pd.DataFrame()  # Return an empty DataFrame
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {df_name} from {url}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on any request error

In [5]:
# Fetching F1 Data Without Print
def fetch_data_to_dataframe_wp(url: str, df_name: str = "data") -> pd.DataFrame:

    # print(f"Attempting to fetch {df_name} from: {url}")
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx or 5xx)
        fetched_data = response.json()

        if fetched_data:
            df = pd.DataFrame(fetched_data)
            # print(f"Successfully fetched {len(df)} rows for {df_name}.")
            return df
        else:
            print(f"No data found for {df_name} from the provided URL.")
            return pd.DataFrame()  # Return an empty DataFrame
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {df_name} from {url}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on any request error

In [6]:
## URL to be fetched
# Fetching sessions for a specific year
year_to_fetch = 2024

sessions_url = f"https://api.openf1.org/v1/sessions?year={year_to_fetch}"
meetings_url = f"https://api.openf1.org/v1/meetings?year={year_to_fetch}"
drivers_url = "https://api.openf1.org/v1/drivers"

In [7]:
# Fetching Data

sessions_df = fetch_data_to_dataframe(sessions_url, df_name=f"sessions for {year_to_fetch}")
meetings_df = fetch_data_to_dataframe(meetings_url, df_name=f"sessions for {year_to_fetch}")
drivers_df = fetch_data_to_dataframe(drivers_url, df_name="all drivers")
race_sessions = sessions_df[sessions_df['session_type'] == 'Race']

Attempting to fetch sessions for 2024 from: https://api.openf1.org/v1/sessions?year=2024
Successfully fetched 123 rows for sessions for 2024.
Attempting to fetch sessions for 2024 from: https://api.openf1.org/v1/meetings?year=2024
Successfully fetched 25 rows for sessions for 2024.
Attempting to fetch all drivers from: https://api.openf1.org/v1/drivers
Successfully fetched 5918 rows for all drivers.


## Fragmented Datasets

In [8]:
# Laps Data

all_laps = []

for idx, row in race_sessions.iterrows():
    session_key = row['session_key']
    laps_url = f"https://api.openf1.org/v1/laps?session_key={session_key}"

    # print(f"Fetching laps for session {session_key}...")

    laps = fetch_data_to_dataframe_wp(laps_url, df_name=f"laps in session {session_key}")

    if laps is not None and not laps.empty:
        laps['session_key'] = session_key  # Optional: keep track of session
        all_laps.append(laps)

# Concatenate all laps into one DataFrame
if all_laps:
    laps_df = pd.concat(all_laps, ignore_index=True)
    print(f"Total laps collected: {len(laps_df)}")
else:
    laps_df = pd.DataFrame()
    print("No laps data found for the selected year.")


# Create a copy of laps_df without the problematic list columns
laps_df_sql = laps_df.drop(columns=['segments_sector_1', 'segments_sector_2', 'segments_sector_3'], errors='ignore')

Total laps collected: 29031


In [9]:
# Session Results
all_results = []

for idx, row in race_sessions.iterrows():
    session_key = row['session_key']
    result_url = f"https://api.openf1.org/v1/session_result?session_key={session_key}"

    # print(f"Fetching session result for session {session_key}...")

    result = fetch_data_to_dataframe_wp(result_url, df_name=f"result in session {session_key}")

    if result is not None and not result.empty:
        result['session_key'] = session_key  # Optional: keep track of session
        all_results.append(result)

# Concatenate all results into one DataFrame
if all_results:
    session_results_df = pd.concat(all_results, ignore_index=True)
    print(f"Total session result rows collected: {len(session_results_df)}")
else:
    session_results_df = pd.DataFrame()
    print("No session result data found for the selected year.")

Error fetching result in session 9549 from https://api.openf1.org/v1/session_result?session_key=9549: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/session_result?session_key=9549
Error fetching result in session 9582 from https://api.openf1.org/v1/session_result?session_key=9582: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/session_result?session_key=9582
Error fetching result in session 9598 from https://api.openf1.org/v1/session_result?session_key=9598: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/session_result?session_key=9598
Error fetching result in session 9644 from https://api.openf1.org/v1/session_result?session_key=9644: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/session_result?session_key=9644
Error fetching result in session 9655 from https://api.openf1.org/v1/session_result?session_key=9655: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/session

In [10]:
# Starting Grid Data
all_grids = []

for idx, row in race_sessions.iterrows():
    session_key = row['session_key']
    grid_url = f"https://api.openf1.org/v1/starting_grid?session_key={session_key}"

    # print(f"Fetching starting grid for session {session_key}...")

    grid = fetch_data_to_dataframe_wp(grid_url, df_name=f"starting grid in session {session_key}")

    if grid is not None and not grid.empty:
        grid['session_key'] = session_key  # Optional: track session
        all_grids.append(grid)

# Concatenate all grids into one DataFrame
if all_grids:
    starting_grid_df = pd.concat(all_grids, ignore_index=True)
    print(f"Total starting grid rows collected: {len(starting_grid_df)}")
else:
    starting_grid_df = pd.DataFrame()
    print("No starting grid data found for the selected year.")

Error fetching starting grid in session 9472 from https://api.openf1.org/v1/starting_grid?session_key=9472: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/starting_grid?session_key=9472
Error fetching starting grid in session 9480 from https://api.openf1.org/v1/starting_grid?session_key=9480: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/starting_grid?session_key=9480
Error fetching starting grid in session 9488 from https://api.openf1.org/v1/starting_grid?session_key=9488: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/starting_grid?session_key=9488
Error fetching starting grid in session 9673 from https://api.openf1.org/v1/starting_grid?session_key=9673: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/starting_grid?session_key=9673
Error fetching starting grid in session 9506 from https://api.openf1.org/v1/starting_grid?session_key=9506: 429 Client Error: Too Many Requests for url: https:/

In [11]:
# Loop through each race session, fetch stints data, and store into stints_df
all_stints = []

for idx, row in race_sessions.iterrows():
    session_key = row['session_key']
    stints_url = f"https://api.openf1.org/v1/stints?session_key={session_key}"

    # print(f"Fetching stints for session {session_key}...")

    stints = fetch_data_to_dataframe_wp(stints_url, df_name=f"stints in session {session_key}")

    if stints is not None and not stints.empty:
        stints['session_key'] = session_key  # Optional: keep track of session
        all_stints.append(stints)

# Concatenate all stints into one DataFrame
if all_stints:
    stints_df = pd.concat(all_stints, ignore_index=True)
    print(f"Total stints collected: {len(stints_df)}")
else:
    stints_df = pd.DataFrame()
    print("No stints data found for the selected year.")

Error fetching stints in session 9480 from https://api.openf1.org/v1/stints?session_key=9480: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/stints?session_key=9480
Error fetching stints in session 9488 from https://api.openf1.org/v1/stints?session_key=9488: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/stints?session_key=9488
Error fetching stints in session 9496 from https://api.openf1.org/v1/stints?session_key=9496: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/stints?session_key=9496
Error fetching stints in session 9672 from https://api.openf1.org/v1/stints?session_key=9672: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/stints?session_key=9672
Error fetching stints in session 9507 from https://api.openf1.org/v1/stints?session_key=9507: 429 Client Error: Too Many Requests for url: https://api.openf1.org/v1/stints?session_key=9507
Error fetching stints in session 9558 from https://api.

## Dataset Joins

In [12]:
# race_sessions[race_sessions.session_key == 9472]

In [13]:
# meetings_df[meetings_df.meeting_key == 1229]

In [14]:
# laps_df[(laps_df['session_key']==9472)&(laps_df['driver_number']==1)].head(2)

In [15]:
session_results_df[(session_results_df.session_key == 9472)&(session_results_df.driver_number == 44)]

Unnamed: 0,position,driver_number,number_of_laps,time_gap,points,meeting_key,session_key
15,7,44,57.0,50.324,6.0,1229,9472


In [16]:
import math

def format_lap_duration(seconds):
    if seconds is None or (isinstance(seconds, float) and math.isnan(seconds)):
        return None  # or return 'N/A' if you prefer a string

    minutes, sec = divmod(seconds, 60)
    sec_int = int(sec)
    milliseconds = int(round((sec - sec_int) * 1000))

    return f"{int(minutes)}:{sec_int:02d}.{milliseconds:03d}"

laps_df_sql['lap_duration_formatted'] = laps_df_sql['lap_duration'].apply(format_lap_duration)

In [17]:
# Function to run SQL queries
pysqldf = lambda q: sqldf(q, globals())

# Laps for each session
query1 = """
SELECT
    md.meeting_name,
    ld.driver_number,
    dr.full_name,
    ld.lap_number,
    ld.lap_duration,
    ld.lap_duration_formatted

FROM race_sessions AS rs
JOIN meetings_df AS md
  ON md.meeting_key = rs.meeting_key
JOIN laps_df_sql AS ld
  ON ld.session_key = rs.session_key
JOIN drivers_df AS dr
  ON dr.driver_number = ld.driver_number
  AND dr.session_key = ld.session_key
"""

laps_with_sessions = pysqldf(query1)

In [18]:
laps_with_sessions.head()

  cast_date_col = pd.to_datetime(column, errors="coerce")


Unnamed: 0,meeting_name,driver_number,full_name,lap_number,lap_duration,lap_duration_formatted
0,Bahrain Grand Prix,1,Max VERSTAPPEN,1,,
1,Bahrain Grand Prix,1,Max VERSTAPPEN,2,96.296,1:36.296
2,Bahrain Grand Prix,1,Max VERSTAPPEN,3,96.753,1:36.753
3,Bahrain Grand Prix,1,Max VERSTAPPEN,4,96.647,1:36.647
4,Bahrain Grand Prix,1,Max VERSTAPPEN,5,97.173,1:37.173


In [19]:
# # Data for Dashboard
# spreadsheet_name = 'laps_with_sessions' # Replace with your sheet's name
# worksheet_name = 'Sheet1' # Replace with your desired worksheet name

# try:
#     sh = gc.open(spreadsheet_name)
#     worksheet = sh.worksheet(worksheet_name)
# except gspread.exceptions.SpreadsheetNotFound:
#     print(f"Spreadsheet '{spreadsheet_name}' not found. Creating a new spreadsheet.")
#     sh = gc.create(spreadsheet_name)
#     # When you create a new spreadsheet, it automatically has a 'Sheet1'.
#     # We'll try to get it, or add if for some reason it's not named 'Sheet1' or we want a different name.
#     try:
#         worksheet = sh.worksheet(worksheet_name)
#     except gspread.exceptions.WorksheetNotFound:
#         print(f"Worksheet '{worksheet_name}' not found in new spreadsheet. Creating it.")
#         worksheet = sh.add_worksheet(worksheet_name, rows="100", cols="20") # Adjust rows/cols as needed
# except gspread.exceptions.WorksheetNotFound:
#     print(f"Worksheet '{worksheet_name}' not found in '{spreadsheet_name}'. Creating new worksheet.")
#     worksheet = sh.add_worksheet(worksheet_name, rows="100", cols="20") # Adjust rows/cols as needed

# # --- ADD THIS LINE TO CLEAR THE SHEET ---
# worksheet.clear()
# print(f"Worksheet '{worksheet_name}' in '{spreadsheet_name}' cleared.")
# # --- END OF ADDITION ---

# from gspread_dataframe import set_with_dataframe
# set_with_dataframe(worksheet, laps_with_sessions, include_index=True, include_column_header=True) # Set include_index=True if you want to export the DataFrame index

# print(f"Data exported to: https://docs.google.com/spreadsheets/d/{sh.id}/edit#gid={worksheet.id}")