In [116]:
import numpy as np
import pandas as pd
import xmltodict
import requests
url = "http://ergast.com/api/f1/drivers.json?callback=myParser"
url1 = "http://ergast.com/api/f1/drivers.json"
url2 = "http://ergast.com/api/f1/1980"
response = requests.get(url2)

In [42]:
'''
Checking if the parsed data is in a json file
Trying to parse an empty response as if it were JSON would raise a JSONDecodeError, so we have to check if the response status is not 204, 
the response headers dict has a content-type key and the value of the `content-type` header is application/json.
'''
response = requests.delete(url2)

print('response: 👉️', response)  # response: 👉️ <Response [204]>
print('response.text: 👉️', response.text)  # response.text: 👉️ ""

# response.status_code: 👉️ 204
print('response.status_code: 👉️', response.status_code)
print('response.headers: 👉️', response.headers)

if (response.status_code != 204 and 'content-type' in response.headers and 'application/json' in response.headers['content-type']):
    parsed = response.json()
    print('✅ parsed response: 👉️', parsed)
else:
# 👇️ this runs
    print('⛔️ conditions not met')

response: 👉️ <Response [200]>
response.text: 👉️ <?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="/schemas/mrd-1.5.xsl"?>
<MRData xmlns="http://ergast.com/mrd/1.5" series="f1" url="http://ergast.com/api/f1/1980" limit="30" offset="0" total="14">
	<RaceTable season="1980">
		<Race season="1980" round="1" url="http://en.wikipedia.org/wiki/1980_Argentine_Grand_Prix">
			<RaceName>Argentine Grand Prix</RaceName>
			<Circuit circuitId="galvez" url="http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Oscar_Alfredo_G%C3%A1lvez">
				<CircuitName>Autódromo Juan y Oscar Gálvez</CircuitName>
				<Location lat="-34.6943" long="-58.4593">
					<Locality>Buenos Aires</Locality>
					<Country>Argentina</Country>
				</Location>
			</Circuit>
			<Date>1980-01-13</Date>
		</Race>
		<Race season="1980" round="2" url="http://en.wikipedia.org/wiki/1980_Brazilian_Grand_Prix">
			<RaceName>Brazilian Grand Prix</RaceName>
			<Circuit circuitId="interlagos" url="http://en.wikipedia.org/wi

In [43]:
#Exploring response attributes
print(response.url)
print(response.history)
print(response.status_code)
print(response.headers)
print(response.headers['content-type'])

http://ergast.com/api/f1/1980
[]
200
{'Date': 'Mon, 12 Sep 2022 18:14:51 GMT', 'Server': 'Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips PHP/5.4.16', 'X-Powered-By': 'PHP/5.4.16', 'Access-Control-Allow-Origin': '*', 'Cache-Control': 'public, max-age=300', 'Content-Length': '6703', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Content-Type': 'application/xml; charset=utf-8'}
application/xml; charset=utf-8


In [44]:
# Reading the XML files and converting them to data frames
races = pd.DataFrame.from_dict(xmltodict.parse(response.text)['MRData']['RaceTable']['Race'])

In [45]:
races.head(1)

Unnamed: 0,@season,@round,@url,RaceName,Circuit,Date
0,1980,1,http://en.wikipedia.org/wiki/1980_Argentine_Gr...,Argentine Grand Prix,"{'@circuitId': 'galvez', '@url': 'http://en.wi...",1980-01-13


In [46]:
races['Circuit'][1]

{'@circuitId': 'interlagos',
 '@url': 'http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Jos%C3%A9_Carlos_Pace',
 'CircuitName': 'Autódromo José Carlos Pace',
 'Location': {'@lat': '-23.7036',
  '@long': '-46.6997',
  'Locality': 'São Paulo',
  'Country': 'Brazil'}}

In [97]:
# Treating races data frame

def practices_quali(i):
    '''
    Support function for converting practices and quali columns
    '''
    global races, races_ap
    if i == 'ThirdPractice':
        # Converting 'ThirdPractice' column
        try:
            races[i] = races[i].apply(lambda x: x if x == x else {'Date': np.nan})
            races[i] = races[i].transform(lambda x: x['Date'])
        except:
            races[i] = np.nan
    else:
        # Converting the other columns
        try:
            races[i] = races[i].transform(lambda x: x['Date'])
        except:
            races[i] = np.nan

#for season in range(1950, 2022):
def season_reader(season):
    global races, races_ap
    # Reading URL and putting into a DF
    url = "http://ergast.com/api/f1/{}".format(season)
    response = requests.get(url)
    races = pd.DataFrame.from_dict(xmltodict.parse(response.text)['MRData']['RaceTable']['Race'])

    # Converting practices and quali functions
    col_list = ['FirstPractice', 'SecondPractice', 'ThirdPractice', 'Qualifying']
    [practices_quali(i) for i in col_list];

    # Splitting 'Circuit' column for circuit information
    races['CircuitID'] = races['Circuit'].transform(lambda x: x['@circuitId'])
    races['CircuitName'] = races['Circuit'].transform(lambda x: x['CircuitName'])
    races['Locality'] = races['Circuit'].transform(lambda x: x['Location']['Locality'])
    races['Country'] = races['Circuit'].transform(lambda x: x['Location']['Country'])

    # Dropping unnecesary columns
    races.drop(columns=['@url', 'Circuit'], inplace=True)

    # Renaming columns
    races.rename(columns={"@season": "Season", "@round": "round", "Date": "Race"}, errors="raise", inplace=True)


races_ap = pd.DataFrame()
[season_reader(season) for season in range(1950, 2023)];

In [120]:
url = "http://ergast.com/api/f1/{}".format(2021)
response = requests.get(url)
races = pd.DataFrame.from_dict(xmltodict.parse(response.text)['MRData']['RaceTable']['Race'])
races
#col_list = ['FirstPractice', 'SecondPractice', 'ThirdPractice', 'Qualifying']
#[practices_quali(i) for i in col_list];

Unnamed: 0,@season,@round,@url,RaceName,Circuit,Date,Time,FirstPractice,SecondPractice,ThirdPractice,Qualifying,Sprint
0,2021,1,http://en.wikipedia.org/wiki/2021_Bahrain_Gran...,Bahrain Grand Prix,"{'@circuitId': 'bahrain', '@url': 'http://en.w...",2021-03-28,15:00:00Z,{'Date': '2021-03-26'},{'Date': '2021-03-26'},{'Date': '2021-03-27'},{'Date': '2021-03-27'},
1,2021,2,http://en.wikipedia.org/wiki/2021_Emilia_Romag...,Emilia Romagna Grand Prix,"{'@circuitId': 'imola', '@url': 'http://en.wik...",2021-04-18,13:00:00Z,{'Date': '2021-04-16'},{'Date': '2021-04-16'},{'Date': '2021-04-17'},{'Date': '2021-04-17'},
2,2021,3,http://en.wikipedia.org/wiki/2021_Portuguese_G...,Portuguese Grand Prix,"{'@circuitId': 'portimao', '@url': 'http://en....",2021-05-02,14:00:00Z,{'Date': '2021-04-30'},{'Date': '2021-04-30'},{'Date': '2021-05-01'},{'Date': '2021-05-01'},
3,2021,4,http://en.wikipedia.org/wiki/2021_Spanish_Gran...,Spanish Grand Prix,"{'@circuitId': 'catalunya', '@url': 'http://en...",2021-05-09,13:00:00Z,{'Date': '2021-05-07'},{'Date': '2021-05-07'},{'Date': '2021-05-08'},{'Date': '2021-05-08'},
4,2021,5,http://en.wikipedia.org/wiki/2021_Monaco_Grand...,Monaco Grand Prix,"{'@circuitId': 'monaco', '@url': 'http://en.wi...",2021-05-23,13:00:00Z,{'Date': '2021-05-21'},{'Date': '2021-05-21'},{'Date': '2021-05-22'},{'Date': '2021-05-22'},
5,2021,6,http://en.wikipedia.org/wiki/2021_Azerbaijan_G...,Azerbaijan Grand Prix,"{'@circuitId': 'baku', '@url': 'http://en.wiki...",2021-06-06,12:00:00Z,{'Date': '2021-06-04'},{'Date': '2021-06-04'},{'Date': '2021-06-05'},{'Date': '2021-06-05'},
6,2021,7,http://en.wikipedia.org/wiki/2021_French_Grand...,French Grand Prix,"{'@circuitId': 'ricard', '@url': 'http://en.wi...",2021-06-20,13:00:00Z,{'Date': '2021-06-18'},{'Date': '2021-06-18'},{'Date': '2021-06-19'},{'Date': '2021-06-19'},
7,2021,8,http://en.wikipedia.org/wiki/2021_Styrian_Gran...,Styrian Grand Prix,"{'@circuitId': 'red_bull_ring', '@url': 'http:...",2021-06-27,13:00:00Z,{'Date': '2021-06-25'},{'Date': '2021-06-25'},{'Date': '2021-06-26'},{'Date': '2021-06-26'},
8,2021,9,http://en.wikipedia.org/wiki/2021_Austrian_Gra...,Austrian Grand Prix,"{'@circuitId': 'red_bull_ring', '@url': 'http:...",2021-07-04,13:00:00Z,{'Date': '2021-07-02'},{'Date': '2021-07-02'},{'Date': '2021-07-03'},{'Date': '2021-07-03'},
9,2021,10,http://en.wikipedia.org/wiki/2021_British_Gran...,British Grand Prix,"{'@circuitId': 'silverstone', '@url': 'http://...",2021-07-18,14:00:00Z,{'Date': '2021-07-16'},{'Date': '2021-07-17'},,{'Date': '2021-07-16'},{'Date': '2021-07-17'}
