In [2]:
import pandas as pd
import numpy as np
from selenium import webdriver
import requests
import bs4
from bs4 import BeautifulSoup
import time

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

In [18]:
### Debug webscraper to ascertain if a driver made it into q3

qualifying_results = pd.DataFrame()
for year in [2020]:
    url = 'https://www.formula1.com/en/results.html/{}/races.html'
    r = requests.get(url.format(year))
    soup = BeautifulSoup(r.text, 'html.parser')

    year_links = []
    for page in soup.find_all('a', attrs = {'class':"resultsarchive-filter-item-link FilterTrigger"}):
        link = page.get('href')
        if f'/en/results.html/{year}/races/' in link: 
            year_links.append(link)

    year_df = pd.DataFrame()
    new_url = 'https://www.formula1.com{}'

    for n, link in list(enumerate(year_links)):
        link = link.replace('race-result.html', 'qualifying.html')
        df = pd.read_html(new_url.format(link))
        df = df[0]
        df['season'] = year
        df['round'] = n+1

        for col in df:
            if 'Unnamed' in col:
                df.drop(col, axis = 1, inplace = True)

        year_df = pd.concat([year_df, df])

    qualifying_results = pd.concat([qualifying_results, year_df])

In [24]:
qualifying_results.query('Q3.notnull() & round == 3')

Unnamed: 0,Pos,No,Driver,Car,Q1,Q2,Q3,Laps,season,round
0,1,44,Lewis Hamilton HAM,Mercedes,1:14.907,1:14.261,1:13.447,22,2020,3
1,2,77,Valtteri Bottas BOT,Mercedes,1:15.474,1:14.530,1:13.554,20,2020,3
2,3,18,Lance Stroll STR,Racing Point BWT Mercedes,1:14.895,1:15.176,1:14.377,22,2020,3
3,4,11,Sergio Perez PER,Racing Point BWT Mercedes,1:14.681,1:15.394,1:14.545,22,2020,3
4,5,5,Sebastian Vettel VET,Ferrari,1:15.455,1:15.131,1:14.774,20,2020,3
5,6,16,Charles Leclerc LEC,Ferrari,1:15.793,1:15.006,1:14.817,20,2020,3
6,7,33,Max Verstappen VER,Red Bull Racing Honda,1:15.495,1:14.976,1:14.849,21,2020,3
7,8,4,Lando Norris NOR,McLaren Renault,1:15.444,1:15.085,1:14.966,20,2020,3
8,9,55,Carlos Sainz SAI,McLaren Renault,1:15.281,1:15.267,1:15.027,21,2020,3


In [None]:
### Web scraping qualifying data

qualifying_results = pd.DataFrame()
for year in list(range(2020,2022)):
    url = 'https://www.formula1.com/en/results.html/{}/races.html'
    r = requests.get(url.format(year))
    soup = BeautifulSoup(r.text, 'html.parser')

    year_links = []
    for page in soup.find_all('a', attrs = {'class':"resultsarchive-filter-item-link FilterTrigger"}):
        link = page.get('href')
        if f'/en/results.html/{year}/races/' in link: 
            year_links.append(link)

    year_df = pd.DataFrame()
    new_url = 'https://www.formula1.com{}'
    for n, link in list(enumerate(year_links)):
        link = link.replace('race-result.html', 'starting-grid.html')
        df = pd.read_html(new_url.format(link))
        df = df[0]
        df['season'] = year
        df['round'] = n+1
        for col in df:
            if 'Unnamed' in col:
                df.drop(col, axis = 1, inplace = True)

        year_df = pd.concat([year_df, df])

    qualifying_results = pd.concat([qualifying_results, year_df])

In [3]:
qualifying_results.rename(columns = {'Pos': 'grid', 'Driver': 'driver', 'Car': 'constructor', 'Time': 'qual_time'}, inplace=True)

qualifying_results.drop('No', axis=1, inplace=True)

qualifying_results.head()

Unnamed: 0,grid,driver,constructor,qual_time,season,round
0,1,Valtteri Bottas BOT,Mercedes,1:02.939,2020,1
1,2,Max Verstappen VER,Red Bull Racing Honda,1:03.477,2020,1
2,3,Lando Norris NOR,McLaren Renault,1:03.626,2020,1
3,4,Alexander Albon ALB,Red Bull Racing Honda,1:03.868,2020,1
4,5,Lewis Hamilton HAM,Mercedes,1:02.951,2020,1


In [4]:
def format_qualifying(x):
    if ':' in x:
        return float(str(x).split(':')[1]) + (60 * float(str(x).split(':')[0])) if x != 0 else 0
    else:
        return(float(x))

In [5]:
qualifying_results['qual_time'] = qualifying_results.qual_time.map(lambda x: format_qualifying(str(x)))

qualifying_results['qual_time'].describe()

count    696.000000
mean      84.047190
std       14.402147
min       53.377000
25%       76.011750
50%       81.645500
75%       91.041000
max      141.611000
Name: qual_time, dtype: float64

In [6]:
# calculate difference in qualifying times

qualifying_results = qualifying_results[qualifying_results['qual_time'] != 0]
qualifying_results.sort_values(['season', 'round', 'grid'], inplace = True)
qualifying_results['time_difference'] = qualifying_results.groupby(['season', 'round']).qual_time.diff()
qualifying_results['q_delta'] = qualifying_results.groupby(['season', 'round']).time_difference.cumsum().fillna(0)
qualifying_results.drop('time_difference', axis=1, inplace=True)

In [7]:
qualifying_results.head()

Unnamed: 0,grid,driver,constructor,qual_time,season,round,q_delta
0,1,Valtteri Bottas BOT,Mercedes,62.939,2020,1,0.0
1,2,Max Verstappen VER,Red Bull Racing Honda,63.477,2020,1,0.538
2,3,Lando Norris NOR,McLaren Renault,63.626,2020,1,0.687
3,4,Alexander Albon ALB,Red Bull Racing Honda,63.868,2020,1,0.929
4,5,Lewis Hamilton HAM,Mercedes,62.951,2020,1,0.012


In [16]:
### Checking for nulls

qualifying_results[qualifying_results['qual_time'].isnull()]

Unnamed: 0,grid,driver,constructor,qual_time,season,round,q_delta
9,10,Pierre Gasly GAS,AlphaTauri Honda,,2020,3,0.0
9,10,Esteban Ocon OCO,Renault,,2020,9,0.0
9,10,Daniel Ricciardo RIC,Renault,,2020,12,0.0
14,15,Carlos Sainz SAI,McLaren Renault,,2020,15,0.0
18,19,Sergio Perez PER,Racing Point BWT Mercedes,,2020,17,0.0
...,...,...,...,...,...,...,...
15,16,Nicholas Latifi LAT,Williams Mercedes,,2021,19,0.0
16,17,George Russell RUS,Williams Mercedes,,2021,19,0.0
17,18,Mick Schumacher MSC,Haas Ferrari,,2021,19,0.0
18,19,Nikita Mazepin MAZ,Haas Ferrari,,2021,19,0.0


In [10]:
nulls

Unnamed: 0,grid,driver,constructor,qual_time,season,round,q_delta
0,1,Valtteri Bottas BOT,Mercedes,62.939,2020,1,0.000
1,2,Max Verstappen VER,Red Bull Racing Honda,63.477,2020,1,0.538
2,3,Lando Norris NOR,McLaren Renault,63.626,2020,1,0.687
3,4,Alexander Albon ALB,Red Bull Racing Honda,63.868,2020,1,0.929
4,5,Lewis Hamilton HAM,Mercedes,62.951,2020,1,0.012
...,...,...,...,...,...,...,...
14,15,Sebastian Vettel VET,Aston Martin Mercedes,84.305,2021,22,2.196
15,16,Nicholas Latifi LAT,Williams Mercedes,84.338,2021,22,2.229
16,17,George Russell RUS,Williams Mercedes,84.423,2021,22,2.314
17,18,Kimi Räikkönen RAI,Alfa Romeo Racing Ferrari,84.779,2021,22,2.670


In [8]:
# qualifying_results.to_csv(path+'data/qualifying.csv', index = False)