In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import requests
import json
import warnings
from unidecode import unidecode

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'
    # path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
races = pd.read_csv(path+'data/races.csv')

rounds = []
for year in np.array(races.season.unique()):
    rounds.append([year, list(races[races.season == year]['round'])])

In [3]:
qual = {
    'season': [],
    'round': [],
    'grid': [],
    'circuit_id':[],
    'driver': [],
    'q1': [],
    'q2': [],
    'q3': []
}

In [4]:
def format_time(x):
    if not any(i in x for i in ['DNF', 'DNS']):
        if ':' in x:
            return round(float(str(x).split(':')[1]) + (60 * float(str(x).split(':')[0])), 3) if x != 0 else 0
        else:
            return(round(float(x), 3))
    else:
        return x

In [5]:
# query results API

for x in list(range(len(rounds))):
    for i in rounds[x][1]:
        url = 'http://ergast.com/api/f1/{}/{}/qualifying.json'
        r = requests.get(url.format(rounds[x][0], i))
        json = r.json()

        for item in json['MRData']['RaceTable']['Races']:
            for n in item['QualifyingResults']:
                qual['season'].append(rounds[x][0])
                qual['round'].append(i)
                qual['grid'].append(n['position'])
                qual['driver'].append(
                    unidecode(n['Driver']['givenName'].lower()) +'_'+ unidecode(n['Driver']['familyName'].lower())
                )
                qual['circuit_id'].append(item['Circuit']['circuitId'])

                try:
                    qual['q3'].append(format_time(n['Q3']))
                except:
                    qual['q3'].append('OUT')

                try:
                    qual['q2'].append(format_time(n['Q2']))
                except:
                    qual['q2'].append('OUT')

                try:
                    qual['q1'].append(format_time(n['Q1']))
                except:
                    qual['q1'].append('DNF')

qualifying = pd.DataFrame(qual)

In [6]:
def format_fastest_lap(row):
    if str(row.q3) != 'OUT':
        return row.q3
    elif str(row.q2) != 'OUT':
        return row.q2
    elif str(row.q1) != 'DNF':
        return row.q1
    else:
        return None

In [7]:
def format_q_rounds(row):
    if str(row.q3) != 'OUT':
        return 'q3'
    elif str(row.q2) != 'OUT':
        return 'q2'
    elif str(row.q1) != 'DNF':
        return 'q1'
    else:
        return 'q1'

In [8]:
qualifying['fastest_time'] = qualifying.apply(lambda row: format_fastest_lap(row), axis=1)

qualifying['stage'] = qualifying.apply(lambda row: format_q_rounds(row), axis=1)
qualifying = qualifying.drop(['q1', 'q2', 'q3'], axis=1)

In [9]:
# calculate difference in qualifying times

qualifying = qualifying[qualifying['fastest_time'] != 0]
qualifying.sort_values(['season', 'round', 'grid'], inplace = True)
qualifying['time_difference'] = qualifying.groupby(['season', 'round']).fastest_time.diff()
qualifying['q_delta'] = qualifying.groupby(['season', 'round']).time_difference.cumsum().fillna(0)
qualifying.drop('time_difference', axis=1, inplace=True)

qualifying.q_delta = qualifying.q_delta.apply(lambda x: 0.000 if x < 0 else x)

qualifying.head()

Unnamed: 0,season,round,grid,circuit_id,driver,fastest_time,stage,q_delta
0,2014,1,1,albert_park,lewis_hamilton,104.231,q3,0.0
9,2014,1,10,albert_park,valtteri_bottas,108.147,q3,3.916
10,2014,1,11,albert_park,jenson_button,104.437,q2,0.206
11,2014,1,12,albert_park,kimi_raikkonen,104.494,q2,0.263
12,2014,1,13,albert_park,sebastian_vettel,104.688,q2,0.457


In [10]:
qualifying.to_csv(path+'data/qualifying.csv', index=False)