In [16]:
import pandas as pd
import xmltodict
import requests
url = "http://ergast.com/api/f1/drivers.json?callback=myParser"
url1 = "http://ergast.com/api/f1/drivers.json"
url2 = "http://ergast.com/api/f1/1980"
response = requests.get(url2)

In [17]:
'''
Checking if the parsed data is in a json file
Trying to parse an empty response as if it were JSON would raise a JSONDecodeError, so we have to check if the response status is not 204, 
the response headers dict has a content-type key and the value of the `content-type` header is application/json.
'''
response = requests.delete(url2)

print('response: 👉️', response)  # response: 👉️ <Response [204]>
print('response.text: 👉️', response.text)  # response.text: 👉️ ""

# response.status_code: 👉️ 204
print('response.status_code: 👉️', response.status_code)
print('response.headers: 👉️', response.headers)

if (response.status_code != 204 and 'content-type' in response.headers and 'application/json' in response.headers['content-type']):
    parsed = response.json()
    print('✅ parsed response: 👉️', parsed)
else:
# 👇️ this runs
    print('⛔️ conditions not met')

response: 👉️ <Response [200]>
response.text: 👉️ <?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="/schemas/mrd-1.5.xsl"?>
<MRData xmlns="http://ergast.com/mrd/1.5" series="f1" url="http://ergast.com/api/f1/1980" limit="30" offset="0" total="14">
	<RaceTable season="1980">
		<Race season="1980" round="1" url="http://en.wikipedia.org/wiki/1980_Argentine_Grand_Prix">
			<RaceName>Argentine Grand Prix</RaceName>
			<Circuit circuitId="galvez" url="http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Oscar_Alfredo_G%C3%A1lvez">
				<CircuitName>Autódromo Juan y Oscar Gálvez</CircuitName>
				<Location lat="-34.6943" long="-58.4593">
					<Locality>Buenos Aires</Locality>
					<Country>Argentina</Country>
				</Location>
			</Circuit>
			<Date>1980-01-13</Date>
		</Race>
		<Race season="1980" round="2" url="http://en.wikipedia.org/wiki/1980_Brazilian_Grand_Prix">
			<RaceName>Brazilian Grand Prix</RaceName>
			<Circuit circuitId="interlagos" url="http://en.wikipedia.org/wi

In [18]:
#Exploring response attributes
print(response.url)
print(response.history)
print(response.status_code)
print(response.headers)
print(response.headers['content-type'])

http://ergast.com/api/f1/1980
[]
200
{'Date': 'Mon, 12 Sep 2022 17:44:56 GMT', 'Server': 'Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips PHP/5.4.16', 'X-Powered-By': 'PHP/5.4.16', 'Access-Control-Allow-Origin': '*', 'Cache-Control': 'public, max-age=300', 'Content-Length': '6703', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Content-Type': 'application/xml; charset=utf-8'}
application/xml; charset=utf-8


In [22]:
# Reading the XML files and converting them to data frames
races = pd.DataFrame.from_dict(xmltodict.parse(response.text)['MRData']['RaceTable']['Race'])

In [23]:
races.head(1)

Unnamed: 0,@season,@round,@url,RaceName,Circuit,Date
0,1980,1,http://en.wikipedia.org/wiki/1980_Argentine_Gr...,Argentine Grand Prix,"{'@circuitId': 'galvez', '@url': 'http://en.wi...",1980-01-13


In [24]:
races['Circuit'][1]

{'@circuitId': 'interlagos',
 '@url': 'http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Jos%C3%A9_Carlos_Pace',
 'CircuitName': 'Autódromo José Carlos Pace',
 'Location': {'@lat': '-23.7036',
  '@long': '-46.6997',
  'Locality': 'São Paulo',
  'Country': 'Brazil'}}

In [25]:
# Treating races data frame

def practices_quali(i):
    races[i] = races[i].transform(lambda x: x['Date'])
# Converting 'FirstPractice' column
try:
    races['FirstPractice'] = races['FirstPractice'].transform(lambda x: x['Date'])
except:
    races['FirstPractice'] = 'NA'
# Converting 'SecondPractice' column
try:
    races['SecondPractice'] = races['SecondPractice'].transform(lambda x: x['Date'])
except:
    races['SecondPractice'] = 'NA'
# Converting 'ThirdPractice' column
try:
    races['ThirdPractice'] = races['ThirdPractice'].apply(lambda x: x if x == x else {'Date': 'NA'})
    races['ThirdPractice'] = races['ThirdPractice'].transform(lambda x: x['Date'])
except:
    races['ThirdPractice'] = 'NA'
# Converting 'Qualifying' column
try:
    races['FirstPractice'] = races['FirstPractice'].transform(lambda x: x['Date'])
except:
    races['FirstPractice'] = 'NA'
# Splitting 'Circuit' column for circuit information
races['CircuitID'] = races['Circuit'].transform(lambda x: x['@circuitId'])
races['CircuitName'] = races['Circuit'].transform(lambda x: x['CircuitName'])
races['Locality'] = races['Circuit'].transform(lambda x: x['Location']['Locality'])
races['Country'] = races['Circuit'].transform(lambda x: x['Location']['Country'])
# Dropping unnecesary columns
races.drop(columns=['@url', 'Circuit'], inplace=True)
# Renaming columns
races.rename(columns={"@season": "Season", "@round": "round", "Date": "Race"}, errors="raise", inplace=True)

KeyError: 'FirstPractice'

In [7]:
races.head(1)

Unnamed: 0,Season,round,RaceName,Race,Time,FirstPractice,SecondPractice,ThirdPractice,Qualifying,Sprint,CircuitID,CircuitName,Locality,Country
0,2021,1,Bahrain Grand Prix,2021-03-28,15:00:00Z,2021-03-26,2021-03-26,2021-03-27,2021-03-27,,bahrain,Bahrain International Circuit,Sakhir,Bahrain
