In [75]:
import pandas as pd
from datetime import datetime
import requests
import json
import warnings

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    # path = '~/Documents/GitHub/f1-analytics/'
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [76]:
races = pd.read_csv(path+'data/races.csv')

rounds = races[races.season == 2021]['round'].unique()

In [77]:
qual = {
    'season': [],
    'round': [],
    'position': [],
    'circuit_id':[],
    'driver': [],
    'q1': [],
    'q2': [],
    'q3': []
}

In [78]:
def format_time(x):
    if not any(i in x for i in ['DNF', 'DNS']):
        if ':' in x:
            return round(float(str(x).split(':')[1]) + (60 * float(str(x).split(':')[0])), 3) if x != 0 else 0
        else:
            return(round(float(x), 3))
    else:
        return x

In [79]:
# query results API

for i in rounds:
    url = 'http://ergast.com/api/f1/{}/{}/qualifying.json'
    r = requests.get(url.format(2021, i))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        for n in item['QualifyingResults']:
            qual['season'].append(2021)
            qual['round'].append(i)
            qual['position'].append(n['position'])
            qual['driver'].append(n['Driver']['driverId'])
            qual['circuit_id'].append(item['Circuit']['circuitId'])

            try:
                qual['q3'].append(format_time(n['Q3']))
            except:
                qual['q3'].append('OUT')

            try:
                qual['q2'].append(format_time(n['Q2']))
            except:
                qual['q2'].append('OUT')

            try:
                qual['q1'].append(format_time(n['Q1']))
            except:
                qual['q1'].append('DNF')

qualifying = pd.DataFrame(qual)

In [80]:
def format_fastest_lap(row):
    if str(row.q3) != 'OUT':
        return row.q3
    elif str(row.q2) != 'OUT':
        return row.q2
    elif str(row.q1) != 'DNF':
        return row.q1
    else:
        return None

In [81]:
def format_q_rounds(row):
    if str(row.q3) != 'OUT':
        return 'q3'
    elif str(row.q2) != 'OUT':
        return 'q2'
    elif str(row.q1) != 'DNF':
        return 'q1'
    else:
        return None

In [82]:
qualifying['fastest_time'] = qualifying.apply(lambda row: format_fastest_lap(row), axis=1)

qualifying['stage'] = qualifying.apply(lambda row: format_q_rounds(row), axis=1)
qualifying = qualifying.drop(['q1', 'q2', 'q3'], axis=1)

In [85]:
qualifying.query('round == 21')

Unnamed: 0,season,round,position,circuit_id,driver,fastest_time,stage
399,2021,21,1,jeddah,hamilton,87.511,q3
400,2021,21,2,jeddah,bottas,87.622,q3
401,2021,21,3,jeddah,max_verstappen,87.653,q3
402,2021,21,4,jeddah,leclerc,88.054,q3
403,2021,21,5,jeddah,perez,88.123,q3
404,2021,21,6,jeddah,gasly,88.125,q3
405,2021,21,7,jeddah,norris,88.18,q3
406,2021,21,8,jeddah,tsunoda,88.442,q3
407,2021,21,9,jeddah,ocon,88.647,q3
408,2021,21,10,jeddah,giovinazzi,88.754,q3
