In this notebook, we perform the all required API calls to get the data and store it in our `data` folder. Explanations about each folder can be found in ./data/readme.txt

Note that the loops have timers on them. This is necessary, because the ergast API only allows a certain amount of API calls before temporarily restricting access to it.

In [1]:
# Necessary import
import requests
import json
import time

The code to access the API and save the data received to a json file is:

In [27]:
constructors_request = requests.get("http://ergast.com/api/f1/constructorStandings/1/constructors.json")
json_constructors_data = constructors_request.json()
with open("./data/constructors-1.json", "w", encoding="utf-8") as constructors_file:
    json.dump(json_constructors_data, constructors_file, ensure_ascii=False, indent=4)

And then to access it, we also use the json module:

In [28]:
file = open("./data/constructors-1.json")
constructors_data = json.load(file)
print(constructors_data)
file.close

{'MRData': {'xmlns': 'http://ergast.com/mrd/1.5', 'series': 'f1', 'url': 'http://ergast.com/api/f1/constructorstandings/1/constructors.json', 'limit': '30', 'offset': '0', 'total': '17', 'ConstructorTable': {'constructorStandings': '1', 'Constructors': [{'constructorId': 'benetton', 'url': 'http://en.wikipedia.org/wiki/Benetton_Formula', 'name': 'Benetton', 'nationality': 'Italian'}, {'constructorId': 'brabham-repco', 'url': 'http://en.wikipedia.org/wiki/Brabham', 'name': 'Brabham-Repco', 'nationality': 'British'}, {'constructorId': 'brawn', 'url': 'http://en.wikipedia.org/wiki/Brawn_GP', 'name': 'Brawn', 'nationality': 'British'}, {'constructorId': 'brm', 'url': 'http://en.wikipedia.org/wiki/BRM', 'name': 'BRM', 'nationality': 'British'}, {'constructorId': 'cooper-climax', 'url': 'http://en.wikipedia.org/wiki/Cooper_Car_Company', 'name': 'Cooper-Climax', 'nationality': 'British'}, {'constructorId': 'ferrari', 'url': 'http://en.wikipedia.org/wiki/Scuderia_Ferrari', 'name': 'Ferrari', '

<function TextIOWrapper.close()>

Now, let's do the same for the rest of the necessary files.

In [29]:
# All seasons
seasons_request = requests.get(url="http://ergast.com/api/f1/seasons.json?limit=73&offset=0")
json_seasons_data = seasons_request.json()
with open("./data/seasons.json", "w", encoding="utf-8") as seasons_file:
    json.dump(json_seasons_data, seasons_file, ensure_ascii=False, indent=4)

In [30]:
# 2008 season
request_2008 = requests.get(url="http://ergast.com/api/f1/2008/results/1.json")
json_data_2008 = request_2008.json()
with open("./data/2008-season.json", "w", encoding="utf-8") as file_2008:
    json.dump(json_data_2008, file_2008, ensure_ascii=False, indent=4)

In [31]:
# All season results
for year in range(1950, 2023):
    results_response = requests.get(url=f"http://ergast.com/api/f1/{year}/results/1.json")
    json_results_data = results_response.json()
    with open(f"./data/all-seasons-results/{year}.json", "w", encoding="utf-8") as results_file:
        json.dump(json_results_data, results_file, ensure_ascii=False, indent=4)
    time.sleep(1)

In [32]:
# All drivers and teams standings
for year in range(1958, 2022):
    drivers_response = requests.get(url=f"http://ergast.com/api/f1/{year}/driverStandings.json")
    teams_response = requests.get(url=f"http://ergast.com/api/f1/{year}/constructorStandings.json")
    json_drivers_data = drivers_response.json()
    json_teams_data = teams_response.json()
    with open(f"./data/all-seasons-driverstandings/{year}.json", "w", encoding="utf-8") as drivers_file:
        json.dump(json_drivers_data, drivers_file, ensure_ascii=False, indent=4)
    with open(f"./data/all-seasons-constructorstandings/{year}.json", "w", encoding="utf-8") as teams_file:
        json.dump(json_teams_data, teams_file, ensure_ascii=False, indent=4)
    time.sleep(1)

In [33]:
# Only the first race of every season
for year in range(1950, 2023):
    # We now need the 1 after the year to specify which race we want
    first_race_response = requests.get(url=f"http://ergast.com/api/f1/{year}/1/results.json")
    first_races_data = first_race_response.json()
    with open(f"./data/all-seasons-first-races/{year}.json", "w", encoding="utf-8") as first_race_file:
        json.dump(first_races_data, first_race_file, ensure_ascii=False, indent=4)
    time.sleep(1)

In [2]:
# Monza laptimes, from 2003-2021

# Let's first get what round the Monza races have occured
monza_list = []
for year in range(2003, 2022):
    season_file = open(f"./data/all-seasons-results/{year}.json")
    season_data = json.load(season_file)
    for race in season_data["MRData"]["RaceTable"]["Races"]:
        if race["Circuit"]["circuitId"] == "monza":
            monza_list.append(race["round"])

# Now we use that list to get the fastest lap times from qualifying
year = 2003
for race in monza_list:
    laptime_request = requests.get(url=f"http://ergast.com/api/f1/{year}/{race}/qualifying.json")
    json_laptime_data = laptime_request.json()
    with open(f"./data/2003-2021-monza-laptimes/{year}.json", "w", encoding="utf-8") as laptime_file:
        json.dump(json_laptime_data, laptime_file, ensure_ascii=False, indent=4)
    year += 1
    time.sleep(2)

In [7]:
# Spa laptimes, from 2003-2021
spa_list = []
for year in range(2004, 2022):
    if year != 2006:
        season_file = open(f"./data/all-seasons-results/{year}.json")
        season_data = json.load(season_file)
        for race in season_data["MRData"]["RaceTable"]["Races"]:
            if race["Circuit"]["circuitId"] == "spa":
                spa_list.append(race["round"])

# races that didn't happen: 2003 and 2006
abscent_years = [2003, 2006]
year = 2004
for race in spa_list:
    if year in abscent_years:
        year += 1
    laptime_request = requests.get(url=f"http://ergast.com/api/f1/{year}/{race}/qualifying.json")
    json_laptime_data = laptime_request.json()
    with open(f"./data/2004-2021-spa-laptimes/{year}.json", "w", encoding="utf-8") as laptime_file:
        json.dump(json_laptime_data, laptime_file, ensure_ascii=False, indent=4)
    time.sleep(2)
    year += 1