In [1]:
import pandas as pd
import requests
from tqdm import tqdm
from dateutil.relativedelta import *

In [13]:
races = {'season': [],
        'round': [],
        'circuit_id': [],
        'country': [],
        'url': []}

rounds_per_season = {}

# gets races each season
for year in tqdm(range(2003,2024)):
    url = 'https://ergast.com/api/f1/{}.json'
    resp = requests.get(url.format(year))
    json = resp.json()
    
    for item in json['MRData']['RaceTable']['Races']:
        races['season'].append(int(item['season']))
        races['round'].append(int(item['round']))
        races['circuit_id'].append(item['Circuit']['circuitId'])
        races['country'].append(item['Circuit']['Location']['country'])
        races['url'].append(item['url'])
        if year not in rounds_per_season:
            rounds_per_season[year] = []
        rounds_per_season[year].append(int(item['round']))
    
    
races = pd.DataFrame(races)
races.to_csv('races.csv', index = False)

100%|██████████| 21/21 [00:08<00:00,  2.38it/s]


In [11]:
results = {'season': [],
          'round':[],
          'circuit_id':[],
          'driver': [],
          'age': [],
          'nationality': [],
          'constructor': [],
          'grid': [],
          'podium': [],
          'url': []}

for season in tqdm(range(2003, 2024)):
    for round in rounds_per_season[season]:
        url = 'http://ergast.com/api/f1/{}/{}/results.json'
        resp = requests.get(url.format(season, round))
        json = resp.json()
        
        for item in json['MRData']['RaceTable']['Races'][0]['Results']:
            results['season'].append(int(json['MRData']['RaceTable']['Races'][0]['season']))
            results['round'].append(int(json['MRData']['RaceTable']['Races'][0]['round']))
            results['circuit_id'].append(json['MRData']['RaceTable']['Races'][0]['Circuit']['circuitId'])
            results['driver'].append(item['Driver']['driverId'])
            results['age'].append(relativedelta(pd.to_datetime(json['MRData']['RaceTable']['Races'][0]['date']), pd.to_datetime(item['Driver']['dateOfBirth'])).years)
            results['nationality'].append(item['Driver']['nationality'])
            results['constructor'].append(item['Constructor']['constructorId'])
            results['grid'].append(int(item['grid']))
            results['podium'].append(int(item['position']))
            results['url'].append(json['MRData']['RaceTable']['Races'][0]['url'])

results = pd.DataFrame(results)
results.to_csv('results.csv', index = False)

100%|██████████| 21/21 [09:09<00:00, 26.19s/it]


In [8]:
driver_standings = {'season': [],
                    'round':[],
                    'driver': [],
                    'driver_points': [],
                    'driver_wins': [],
                    'driver_standings_pos': []}

# get driver standings
for season in tqdm(range(2003, 2024)):
    for round in rounds_per_season[season]:    
        url = 'https://ergast.com/api/f1/{}/{}/driverStandings.json'
        resp = requests.get(url.format(season, round))
        json = resp.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']:
            driver_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            driver_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
            driver_standings['driver'].append(item['Driver']['driverId'])
            try:
                driver_standings['driver_points'].append(int(item['points']))
            except:
                driver_standings['driver_points'].append(0)
            driver_standings['driver_wins'].append(int(item['wins']))
            driver_standings['driver_standings_pos'].append(int(item['position']))
            
driver_standings = pd.DataFrame(driver_standings)
driver_standings.to_csv('driver_standings.csv', index = False)

100%|██████████| 21/21 [29:10<00:00, 83.36s/it]


In [9]:
constructor_standings = {'season': [],
                         'round':[],
                         'constructor': [],
                         'constructor_points': [],
                         'constructor_wins': [],
                         'constructor_standings_pos': []}

for season in tqdm(range(2003, 2024)):
    for round in rounds_per_season[season]:
        url = 'https://ergast.com/api/f1/{}/{}/constructorStandings.json'
        resp = requests.get(url.format(season, round))
        json = resp.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings']:
            constructor_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            constructor_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))                     
            constructor_standings['constructor'].append(item['Constructor']['constructorId'])
            try:
                constructor_standings['constructor_points'].append(int(item['points']))
            except: 
                constructor_standings['constructor_points'].append(0)
            constructor_standings['constructor_wins'].append(int(item['wins']))
            constructor_standings['constructor_standings_pos'].append(int(item['position']))
            
constructor_standings = pd.DataFrame(constructor_standings)
constructor_standings.to_csv('constructor_standings.csv', index = False)

100%|██████████| 21/21 [04:50<00:00, 13.82s/it]


In [10]:
qualifying_results = {'driver': [],
                      'car': [],
                      'qualifying_time': [],
                      'season': [],
                      'round': []}

for season in tqdm(range(2003, 2024)):
    for round in rounds_per_season[season]:
        url = 'http://ergast.com/api/f1/{}/{}/qualifying.json'
        resp = requests.get(url.format(season, round))
        json = resp.json()
        
        for item in json['MRData']['RaceTable']['Races'][0]['QualifyingResults']:
            qualifying_results['driver'].append(item['Driver']['driverId'])
            qualifying_results['car'].append(item['Constructor']['name'])
            try:
                if 'Q3' in item and item['Q3']:
                    qualifying_results['qualifying_time'].append(item['Q3'])
                elif 'Q2' in item and item['Q2']:
                    qualifying_results['qualifying_time'].append(item['Q2'])
                elif 'Q1' in item and item['Q1']:
                    qualifying_results['qualifying_time'].append(item['Q1'])
                else:
                    qualifying_results['qualifying_time'].append("00.000")
            except:
                qualifying_results['qualifying_time'].append("00.000")
                qualifying_results['Q'].append(int(1))
            qualifying_results['season'].append(int(json['MRData']['RaceTable']['Races'][0]['season']))
            qualifying_results['round'].append(int(json['MRData']['RaceTable']['Races'][0]['round']))
            
qualifying_results = pd.DataFrame(qualifying_results)
qualifying_results.to_csv('qualifying_results.csv', index = False)

100%|██████████| 21/21 [04:30<00:00, 12.87s/it]
