In [18]:
## F1 practice and grid data from 2003 to 2024. 

In [19]:
import matplotlib.pyplot as plt
import seaborn as sns
from seaborn import set_style
set_style("whitegrid")

import pandas as pd
import numpy as np
import math
import requests
from bs4 import BeautifulSoup

In [20]:
## Put all the possible years in a list.
url = 'https://pitwall.app/races'
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html.parser')
ul = soup.find('ul', attrs = {'class':'options'})
possible_years = [x.text for x in ul.find_all('a')]

In [21]:
## Define a function that converts minutes to seconds.
def convert(t):
    if ':' in t:
        sec_from_min = float(t.split(':')[0]) * 60
        sec = float(t.split(':')[1])
        return sec_from_min + sec
    else:
        return float(t)

In [22]:
F1 = pd.DataFrame(columns=['Season',
                               'Grand Prix',
                               'Driver',
                               'Constructor',
                               'FP1 Time','FP1 Gap','FP1 Laps',
                               'FP2 Time','FP2 Gap','FP2 Laps',
                               'FP3 Time','FP3 Gap','FP3 Laps',
                               'Grid Position', 'Qualifying Time'])

## You can slice possible_years
for year in possible_years[:1]: # range of seasons
    url = 'https://pitwall.app/races/archive/' + year
    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'html.parser')
    tbody = soup.find('tbody')
    possible_races = [x['href'] for x in tbody.find_all('a')]

    ## You can slice possible races
    for race in possible_races[:1]: # range of races in a given season
        url = 'https://pitwall.app/races/' + year + race[11:]
        html = requests.get(url)
        soup = BeautifulSoup(html.text, 'html.parser')
        print('')
        print('working on', race)
        print('')
        
        ### Get the Free Practice data ###
        
        ## Get FP1
        FP1 = soup.find('div', attrs = {'id':'free_practice_1'})
        data_1 = []
        if FP1 != None:
            FP1_table = FP1.find('table', attrs = {'class':'data-table'})
            body = FP1_table.find('tbody')
            rows_1 = body.find_all('tr')
            data_1 = []
            for row in rows_1:
                row_data = []
                for cell in row.find_all('td')[1:]:
                    row_data.append(cell.text)
                ## Name
                name = row_data[0].split(' ')[2]
                row_data[0] = name
                ## Time
                if row_data[2] != '':
                    row_data[2] = round(convert(row_data[2]), 3)
                else:
                    row_data[2] = np.nan
                ## Gap
                if row_data[3] == '':
                    row_data[3] = 0 
                else:
                    if row_data[3] != '':
                        row_data[3] = float(convert(row_data[3].replace('+','')))
                    else:
                        row_data[3] = np.nan
                ## Lap
                if row_data[4] != '':
                    row_data[4] = float(row_data[4])
                else:
                    row_data[4] = np.nan
                data_1.append(row_data)
            print(data_1)
            print('')
        else:
            print('FP1 does not exists')
            print('')
        
        ## Get FP2
        FP2 = soup. find('div', attrs = {'id':'free_practice_2'}) 
        data_2 = []
        if FP2 != None:
            FP2_table = FP2.find('table', attrs = {'class':'data-table'})
            body = FP2_table.find('tbody')
            rows_2 = body.find_all('tr')
            for row in rows_2:
                row_data = []
                for cell in row.find_all('td')[1:]:
                    row_data.append(cell.text)
                ## Name
                name = row_data[0].split(' ')[2]
                row_data[0] = name
                ## Time
                if row_data[2] != '':
                    row_data[2] = round(convert(row_data[2]), 3)
                else:
                    row_data[2] = np.nan
                ## Gap
                if row_data[3] == '':
                    row_data[3] = 0 
                else:
                    if row_data[3] != '':
                        row_data[3] = float(convert(row_data[3].replace('+','')))
                    else:
                        row_data[3] = np.nan
                ## Lap
                if row_data[4] != '':
                    row_data[4] = float(row_data[4])
                else:
                    row_data[4] = np.nan
                data_2.append(row_data)
            print(data_2)    
            print('')
            ## Update data_1
            for row in data_1:
                if row[0] in [x[0] for x in data_2]:
                    index = [x[0] for x in data_2].index(row[0])
                    for i in data_2[index][2:]:
                        row.append(i)
                else:
                    row = [y for x in [row, [np.nan, np.nan, np.nan]] for y in x]
        else:
            print('FP2 does not exists')
            print('')
        
        ## Get FP3
        FP3 = soup.find('div', attrs = {'id':'free_practice_3'})
        data_3 = []
        if FP3 != None:
            FP3_table = FP3.find('table', attrs = {'class':'data-table'})
            body = FP3_table.find('tbody')
            rows_3 = body.find_all('tr')
            for row in rows_3:
                row_data = []
                for cell in row.find_all('td')[1:]:
                        row_data.append(cell.text)
                ## Name
                name = row_data[0].split(' ')[2]
                row_data[0] = name
                ## Time
                if row_data[2] != '':
                    row_data[2] = round(convert(row_data[2]), 3)
                else:
                    row_data[2] = np.nan
                ## Gap
                if row_data[3] == '':
                    row_data[3] = 0 
                else:
                    if row_data[3] != '':
                        row_data[3] = float(convert(row_data[3].replace('+','')))
                    else:
                        row_data[3] = np.nan
                ## Lap
                if row_data[4] != '':
                    row_data[4] = float(row_data[4])
                else:
                    row_data[4] = np.nan
                data_3.append(row_data)
            print(data_3)
            print('')
            ## Update data_1
            for row in data_1:
                if row[0] in [x[0] for x in data_3]:
                    index = [x[0] for x in data_3].index(row[0])
                    for i in data_3[index][2:]:
                        row.append(i)
                else:
                    row = [y for x in [row, [np.nan, np.nan, np.nan]] for y in x]
        else:
            print('FP3 does not exists')
            print('')
        
        ### Get the Starting grid ###
        grid = soup.find('div', attrs = {'id':'qualifying'})
        if grid != None:
            grid_table = grid.find('table', attrs = {'class':'data-table'})
            body = grid_table.find('tbody')
            rows_grid = body.find_all('tr')
            row_data_names = []
            row_data_times = []
            
            for row in rows_grid:
                for cell in row.find_all('td')[1:2]:
                    name = cell.text
                    row_data_names.append(name)
            
            row_data_names = [x.split(' ')[2] for x in row_data_names]
            
            for row in rows_grid:
                for cell in row.find_all('td')[4:5]:
                    time = cell.text
                    if time == '':
                        row_data_times.append(np.nan)
                    else:
                        row_data_times.append(time)
#             row_data_times = [round(convert(x), 3) for x in row_data_times] 
            
            ## Update data_1
            for row in data_1:
                if row[0] in row_data_names:
                    index = row_data_names.index(row[0])
                    row.append(index + 1)
                else:
                    row.append(np.nan)
                if row[0] in row_data_names:
                    index = row_data_names.index(row[0])
                    row.append(row_data_times[index]) 
                else:
                    row.append(np.nan)    
        else:
            print('Grid does not exist')
            print('')
            
        
        ### Update the data frame ###
        if (FP1 != None) and (FP2 != None) and (FP3 != None) and (grid != None):
            ## Make clean dataframes for the current race and year.
            L = [year, race[12:]]
            data_1 = [L + x for x in data_1]
            df = pd.DataFrame(data_1, columns=['Season',
                                   'Grand Prix',
                                   'Driver',
                                   'Constructor',
                                   'FP1 Time','FP1 Gap','FP1 Laps',
                                   'FP2 Time','FP2 Gap','FP2 Laps',
                                   'FP3 Time','FP3 Gap','FP3 Laps',
                                   'Grid Position', 'Qualifying Time'])
            F1 = pd.concat([F1, df])
            


working on /races/2023-bahrain-grand-prix

[['Pérez', 'Red Bull', 92.758, 0, 21.0], ['Alonso', 'Aston Martin', 93.196, 0.438, 22.0], ['Verstappen', 'Red Bull', 93.375, 0.617, 21.0], ['Norris', 'McLaren', 94.165, 1.407, 21.0], ['Leclerc', 'Ferrari', 94.257, 1.499, 17.0], ['Stroll', 'Aston Martin', 94.298, 1.54, 17.0], ['Magnussen', 'Haas', 94.402, 1.644, 20.0], ['Zhou', 'Alfa Romeo', 94.575, 1.817, 18.0], ['Bottas', 'Alfa Romeo', 94.689, 1.931, 20.0], ['Hamilton', 'Mercedes', 94.917, 2.159, 13.0], ['Russell', 'Mercedes', 94.966, 2.208, 20.0], ['Piastri', 'McLaren', 94.997, 2.239, 24.0], ['Tsunoda', 'AlphaTauri', 95.015, 2.257, 20.0], ['Hülkenberg', 'Haas', 95.043, 2.285, 22.0], ['Ocon', 'Alpine', 95.105, 2.347, 20.0], ['de', 'AlphaTauri', 95.402, 2.644, 25.0], ['Gasly', 'Alpine', 95.455, 2.697, 22.0], ['Sargeant', 'Williams', 95.749, 2.991, 24.0], ['Albon', 'Williams', 96.018, 3.26, 15.0], ['Sainz', 'Ferrari', 96.072, 3.314, 21.0]]

[['Alonso', 'Aston Martin', 90.907, 0, 25.0], ['Verst

  F1 = pd.concat([F1, df])


In [23]:
F1['Qualifying Time'].fillna(value=np.nan, inplace=True)
F1 = F1.dropna()
F1['Qualifying Time'] = [convert(x) for x in F1['Qualifying Time']] 

In [24]:
F1

Unnamed: 0,Season,Grand Prix,Driver,Constructor,FP1 Time,FP1 Gap,FP1 Laps,FP2 Time,FP2 Gap,FP2 Laps,FP3 Time,FP3 Gap,FP3 Laps,Grid Position,Qualifying Time
0,2023,bahrain-grand-prix,Pérez,Red Bull,92.758,0.0,21.0,91.078,0.171,26.0,92.446,0.106,12.0,2,91.479
1,2023,bahrain-grand-prix,Alonso,Aston Martin,93.196,0.438,22.0,90.907,0.0,25.0,92.34,0.0,13.0,5,91.158
2,2023,bahrain-grand-prix,Verstappen,Red Bull,93.375,0.617,21.0,91.076,0.169,24.0,92.345,0.005,13.0,1,91.295
3,2023,bahrain-grand-prix,Norris,McLaren,94.165,1.407,21.0,91.57,0.663,27.0,93.202,0.862,16.0,11,91.652
4,2023,bahrain-grand-prix,Leclerc,Ferrari,94.257,1.499,17.0,91.367,0.46,26.0,92.624,0.284,21.0,3,91.094
5,2023,bahrain-grand-prix,Stroll,Aston Martin,94.298,1.54,17.0,91.45,0.543,28.0,92.919,0.579,16.0,8,91.184
6,2023,bahrain-grand-prix,Magnussen,Haas,94.402,1.644,20.0,92.11,1.203,18.0,93.381,1.041,21.0,17,91.892
7,2023,bahrain-grand-prix,Zhou,Alfa Romeo,94.575,1.817,18.0,91.586,0.679,27.0,93.18,0.84,13.0,13,91.615
8,2023,bahrain-grand-prix,Bottas,Alfa Romeo,94.689,1.931,20.0,91.793,0.886,28.0,93.629,1.289,14.0,12,91.504
9,2023,bahrain-grand-prix,Hamilton,Mercedes,94.917,2.159,13.0,91.543,0.636,27.0,92.555,0.215,17.0,7,91.543


In [25]:
filename = 'F1Data.csv'

In [26]:
F1.to_csv(filename, sep=',', index=False, encoding='utf-8')