In [83]:
year_data = 2025

In [84]:
import requests
import pandas as pd

In [85]:
ergast_base_url = "https://api.jolpi.ca/ergast/f1/"
# https://github.com/jolpica/jolpica-f1/blob/main/docs/README.md

In [86]:
def get_drivers_df(year):
    url = f"{ergast_base_url}/{year}/drivers/"
    response = requests.get(url)
    
    drivers_list = response.json()['MRData']['DriverTable']['Drivers']
    drivers_df = pd.json_normalize(drivers_list)
    
    # Rename columns
    drivers_df = drivers_df.rename(columns={'driverId': 'driver_id', 'permanentNumber': 'permanent_number', 'givenName': 'given_name'
        , 'familyName': 'family_name', 'dateOfBirth': 'birth_day'})
    
    # Assign types
    drivers_df = drivers_df.astype({'permanent_number': int, 'birth_day': 'datetime64[ns]'})
    
    return drivers_df

In [87]:
def get_constructors_df(year):
    url = f"{ergast_base_url}/{year}/constructors/"
    response = requests.get(url)
    
    constructors_list = response.json()['MRData']['ConstructorTable']['Constructors']
    constructors_df = pd.json_normalize(constructors_list)
    
    # Rename columns
    constructors_df = constructors_df.rename(columns={'constructorId': 'constructor_id'})
    
    return constructors_df

In [88]:
def get_races_calendar_df(year):
    url = f"{ergast_base_url}/{year}"
    response = requests.get(url)
    
    races_list = response.json()['MRData']['RaceTable']['Races']    
    races_df = pd.json_normalize(races_list)
    
    # Drop unnecessary columns
    races_df.drop(['FirstPractice.time', 'SecondPractice.time', 'ThirdPractice.time'], axis=1, inplace=True)
    
    # Rename columns
    races_df = races_df.rename(columns={'raceName': 'race_name', 'url': 'url_grand_prix'
        , 'Circuit.circuitId': 'circuit_id', 'Circuit.url': 'url_circuit'
        , 'Circuit.circuitName': 'name', 'Circuit.Location.lat': 'location_lat', 'Circuit.Location.long': 'location_long'
        , 'Circuit.Location.locality': 'location_locality', 'Circuit.Location.country': 'country'
        , 'FirstPractice.date': 'practice1_date', 'SecondPractice.date': 'practice2_date', 'ThirdPractice.date': 'practice3_date'
        , 'Qualifying.date': 'qualifying_date', 'Qualifying.time': 'qualifying_time'
        , 'Sprint.date': 'sprint_date', 'Sprint.time': 'sprint_time'
        , 'SprintQualifying.date': 'sprint_qualy_date', 'SprintQualifying.time': 'sprint_qualy_time'})
    
    # Assign types
    races_df = races_df.astype({'season': int, 'round': int, 'date': 'datetime64[ns]', 'location_lat': float, 'location_long': float
        , 'practice1_date': 'datetime64[ns]', 'practice2_date': 'datetime64[ns]', 'practice3_date': 'datetime64[ms]'
        , 'qualifying_date': 'datetime64[ns]', 'sprint_date': 'datetime64[ns]', 'sprint_qualy_date': 'datetime64[ns]'})
    
    return races_df

In [None]:
def get_races_results_df(year, year_races_calendar_df):
    # Start season races pd df
    races_df = pd.DataFrame()

    # Get season length
    season_length = len(year_races_calendar_df.index)

    # For each race
    for round_index in range(1,season_length+1):
        try:
            url = f"{ergast_base_url}/{year}/{round_index}/results"
            response = requests.get(url)
            
            # Get race result into a pd df
            race_result_df = pd.json_normalize(response.json()['MRData']['RaceTable']['Races'][0]['Results'])
            
            # Add circuit id, season year, round id
            race_row = year_races_calendar_df.loc[year_races_calendar_df['round'].astype(int) == round_index]
            
            race_result_df['raceRoundId'] = race_row['round'].values[0]
            race_result_df['seasonYear'], race_result_df['circuitId'] = race_row['season'].values[0], race_row['circuit_id'].values[0]
            
            # Add race result df to season races df
            races_df = pd.concat([races_df, race_result_df], ignore_index=True)
        except:
            # If no more races
            break 
    
    # Keep these columns 
    races_df = races_df[['number','position','positionText','points','grid','laps','status','Driver.permanentNumber','Driver.code'
        ,'Constructor.constructorId','Time.millis','Time.time','FastestLap.rank','FastestLap.lap','FastestLap.Time.time'
        ,'raceRoundId','seasonYear','circuitId']]
    
    # Rename columns
    races_df = races_df.rename(columns={'number': 'driver_number', 'positionText': 'position_text'
        , 'Driver.permanentNumber': 'driver_permanent_number', 'Driver.code': 'driver_code'
        , 'Constructor.constructorId': 'constructor_id', 'Time.millis': 'time_millis', 'Time.time': 'time_interval'
        , 'FastestLap.rank': 'fastest_lap_rank', 'FastestLap.lap': 'fastest_lap_lap', 'FastestLap.Time.time': 'fastest_lap_time'
        , 'raceRoundId': 'race_round_id', 'seasonYear': 'season', 'circuitId': 'race_circuit_id'})
    
    # Fill NaN
    races_df.fillna({'fastest_lap_rank': 0, 'fastest_lap_lap': 0, 'season': 0, 'race_round_id': 0}, inplace=True)
    
    # Assign types
    races_df = races_df.astype({'driver_number': int, 'position': int, 'points': int, 'grid': int, 'laps': int
        , 'driver_permanent_number': int, 'time_millis': float, 'fastest_lap_rank': int, 'fastest_lap_lap': int
        , 'race_round_id': int, 'season': int})
    
    return races_df

In [90]:
if __name__ == "__main__":
    year_drivers_df = get_drivers_df(year_data)
    year_constructors_df = get_constructors_df(year_data)
    year_races_calendar_df = get_races_calendar_df(year_data)
    year_data_df = get_races_results_df(year_data, year_races_calendar_df)

   number position positionText points grid laps    status Driver.driverId  \
0       4        1            1     25    1   57  Finished          norris   
1       1        2            2     18    3   57  Finished  max_verstappen   
2      63        3            3     15    4   57  Finished         russell   
3      12        4            4     12   16   57  Finished       antonelli   
4      23        5            5     10    6   57  Finished           albon   
5      18        6            6      8   13   57  Finished          stroll   
6      27        7            7      6   17   57  Finished      hulkenberg   
7      16        8            8      4    7   57  Finished         leclerc   
8      81        9            9      2    2   57  Finished         piastri   
9      44       10           10      1    8   57  Finished        hamilton   
10     10       11           11      0    9   57  Finished           gasly   
11     22       12           12      0    5   57  Finished      

In [None]:
year_data_df.head()

Unnamed: 0,driver_number,position,position_text,points,grid,laps,status,driver_permanent_number,driver_code,constructor_id,time_millis,time_interval,fastest_lap_rank,fastest_lap_lap,fastest_lap_time,race_round_id,season,race_circuit_id
0,4,1,1,25,1,57,Finished,4,NOR,mclaren,6126304.0,1:42:06.304,1,43,1:22.167,1,2025,albert_park
1,1,2,2,18,3,57,Finished,33,VER,red_bull,6127199.0,+0.895,3,43,1:23.081,0,0,
2,63,3,3,15,4,57,Finished,63,RUS,mercedes,6134785.0,+8.481,11,43,1:25.065,0,0,
3,12,4,4,12,16,57,Finished,12,ANT,mercedes,6136439.0,+10.135,9,43,1:24.901,0,0,
4,23,5,5,10,6,57,Finished,23,ALB,williams,6139077.0,+12.773,8,43,1:24.597,0,0,


In [None]:
#print(year_drivers_df.to_markdown())
#print(year_constructors_df.to_markdown())
#print(year_races_calendar_df.to_markdown())

In [None]:
#print(year_data_df.to_markdown())