In [6]:
# Imports libraries
import pandas as pd
import numpy as np
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [58]:
# Imports 'results' and 'races' as data frames
results = pd.read_csv('f1_archive/results.csv')
races = pd.read_csv('f1_archive/races.csv')
drivers = pd.read_csv('f1_archive/drivers.csv')

drivers[drivers['code'] == 'PER']

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
814,815,perez,11,PER,Sergio,Pérez,1990-01-26,Mexican,http://en.wikipedia.org/wiki/Sergio_P%C3%A9rez


In [7]:
link = 'https://backofthegrid.com/prediction-results?race=' + str(62)
df = pd.read_html(link)[0]

df.head()

Unnamed: 0.1,Unnamed: 0,Name,Fastest Qualifier,Winner,First DNF,Number of Finishers,Random DriverAntonio Giovinazzi,Score
0,1,Alistair Cornwall,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,12th,5.0
1,2,Adam Plant,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,14th,4.0
2,3,Andrew Styles,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,14th,4.0
3,4,Richard Garvey,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,13th,4.0
4,5,Scott Hempsall Jones,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,13th,4.0


In [45]:
# Creates a dataframe to check the results against
df_check = races[races['year'] == 2021][['raceId', 'round', 'name']]
df_check.sort_values(by='round', inplace=True)
df_check.drop([1051, 1052], axis=0, inplace=True)
df_check.reset_index(drop=True, inplace=True)
df_check['botgId'] = df_check.index + 62

for i in df_check.index:
    if i > 13:
        df_check.loc[i,'botgId'] += 1

df_check['winner'] = ''
df_check['dnf1'] = ''

for i in df_check.index:
    id = df_check.loc[i,'raceId']
    winner = results[(results['raceId'] == id) & (results['positionOrder'] == 1)]['driverId'].to_list()[0]
    try:
        dnf1 = results[(results['raceId'] == id) & (results['positionOrder'] == 20)]['driverId'].to_list()[0]
    except:
        dnf1 = results[(results['raceId'] == id) & (results['positionOrder'] == 19)]['driverId'].to_list()[0]
    
    winner_name = drivers[drivers['driverId'] == winner]['forename'].to_list()[0] + ' ' + drivers[drivers['driverId'] == winner]['surname'].to_list()[0]
    dnf1_name = drivers[drivers['driverId'] == dnf1]['forename'].to_list()[0] + ' ' + drivers[drivers['driverId'] == dnf1]['surname'].to_list()[0]

    df_check.loc[i,'winner'] = winner_name
    df_check.loc[i,'dnf1'] = dnf1_name

df_check.head(22)

Unnamed: 0,raceId,round,name,botgId,winner,dnf1
0,1052,1,Bahrain Grand Prix,62,Lewis Hamilton,Nikita Mazepin
1,1053,2,Emilia Romagna Grand Prix,63,Max Verstappen,Nicholas Latifi
2,1054,3,Portuguese Grand Prix,64,Lewis Hamilton,Kimi Räikkönen
3,1055,4,Spanish Grand Prix,65,Lewis Hamilton,Yuki Tsunoda
4,1056,5,Monaco Grand Prix,66,Max Verstappen,Charles Leclerc
5,1057,6,Azerbaijan Grand Prix,67,Sergio Pérez,Esteban Ocon
6,1059,7,French Grand Prix,68,Max Verstappen,Nikita Mazepin
7,1058,8,Styrian Grand Prix,69,Max Verstappen,Pierre Gasly
8,1060,9,Austrian Grand Prix,70,Max Verstappen,Esteban Ocon
9,1061,10,British Grand Prix,71,Lewis Hamilton,Max Verstappen


In [46]:
# Finds prediction data from https://backofthegrid.com/
botg_preds_dict = {}

for i in df_check['botgId']:
    link = 'https://backofthegrid.com/prediction-results?race=' + str(i)
    botg_preds_dict[i] = pd.read_html(link)[0]

In [47]:
botg_preds_dict[62].head()

Unnamed: 0.1,Unnamed: 0,Name,Fastest Qualifier,Winner,First DNF,Number of Finishers,Random DriverAntonio Giovinazzi,Score
0,1,Alistair Cornwall,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,12th,5.0
1,2,Adam Plant,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,14th,4.0
2,3,Andrew Styles,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,14th,4.0
3,4,Richard Garvey,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,13th,4.0
4,5,Scott Hempsall Jones,Max Verstappen,Lewis Hamilton,Nikita Mazepin,16,13th,4.0


In [110]:
# Adds column to the checker for evaluating the random driver
name_to_id = {}

for i in drivers.index:
    id = drivers.loc[i,'driverId']
    name = drivers.loc[i,'forename'] + ' ' + drivers.loc[i,'surname']
    name_to_id[name] = id

random_driver_dict = {}

for i in df_check['botgId']:
    random_driver = botg_preds_dict[i].columns[6][13:]
    random_driver_dict[i] = random_driver

df_check['randomDriver'] = random_driver_dict.values()
df_check['randomId'] = np.nan

for i in df_check.index:
    try:
        name = df_check.loc[i,'randomDriver']
        id = name_to_id[name]
        df_check.loc[i,'randomId'] = id
    except:
        df_check.loc[i,'randomId'] = 815

df_check['randomPos'] = np.nan

for i in df_check.index:
    driver = df_check.loc[i,'randomId']
    race = df_check.loc[i,'raceId']
    pos = results[(results['raceId'] == race) & (results['driverId'] == driver)]['positionOrder'].to_list()[0]
    df_check.loc[i,'randomPos'] = int(pos)

# Handels exceptions with wierd charachters not used in the BOTG Database
df_check.loc[5, 'winner'] = 'Sergio Perez'
df_check.loc[2, 'dnf1'] = 'Kimi Raikkonen'
df_check.loc[19, 'dnf1'] = 'Kimi Raikkonen'

df_check.to_csv('f1_archive/botg_prediction_checker.csv')

df_check.head(22)

Unnamed: 0,raceId,round,name,botgId,winner,dnf1,randomDriver,randomId,randomPos
0,1052,1,Bahrain Grand Prix,62,Lewis Hamilton,Nikita Mazepin,Antonio Giovinazzi,841.0,12.0
1,1053,2,Emilia Romagna Grand Prix,63,Max Verstappen,Nicholas Latifi,Sebastian Vettel,20.0,15.0
2,1054,3,Portuguese Grand Prix,64,Lewis Hamilton,Kimi Raikkonen,George Russell,847.0,16.0
3,1055,4,Spanish Grand Prix,65,Lewis Hamilton,Yuki Tsunoda,Daniel Ricciardo,817.0,6.0
4,1056,5,Monaco Grand Prix,66,Max Verstappen,Charles Leclerc,Lando Norris,846.0,3.0
5,1057,6,Azerbaijan Grand Prix,67,Sergio Perez,Esteban Ocon,Yuki Tsunoda,852.0,7.0
6,1059,7,French Grand Prix,68,Max Verstappen,Nikita Mazepin,Sergio Perez,815.0,3.0
7,1058,8,Styrian Grand Prix,69,Max Verstappen,Pierre Gasly,Nikita Mazepin,853.0,18.0
8,1060,9,Austrian Grand Prix,70,Max Verstappen,Esteban Ocon,Valtteri Bottas,822.0,2.0
9,1061,10,British Grand Prix,71,Lewis Hamilton,Max Verstappen,Lewis Hamilton,1.0,1.0


In [121]:
# Scores all of the results according to df_check
player_list = []

for keys in botg_preds_dict.keys():
    players = botg_preds_dict[keys]['Name'].to_list()
    player_list = player_list + players

player_list = list(set(player_list))
print(len(player_list))

scores = pd.DataFrame({'players': player_list})

for keys in botg_preds_dict.keys():
    scores[keys] = 0
    df = botg_preds_dict[keys]

    winner = df_check[df_check['botgId'] == keys]['winner'].to_list()[0]
    dnf1 = df_check[df_check['botgId'] == keys]['dnf1'].to_list()[0]
    randomPos = df_check[df_check['botgId'] == keys]['randomPos'].to_list()[0]

    for i in scores.index:
        player = scores.loc[i,'players']
        
        try:
            index = df[df['Name'] == player].index 
            score = 0

            if df.iloc[index, 3].to_list()[0] == winner:
                score += 1
            
            if df.iloc[index, 4].to_list()[0] == dnf1:
                score += 1

            if int(df.iloc[index, 6].to_list()[0][:-2]) == randomPos:
                score += 1

            scores.loc[i,keys] = score
        except:
            pass
        
scores.head()

601


Unnamed: 0,players,62,63,64,65,66,67,68,69,70,...,72,73,74,75,77,78,79,80,81,82
0,Lucy Monaghan,1,0,1,0,0,0,0,0,1,...,0,1,2,0,0,2,1,0,2,2
1,Kevin Cosgrave,0,0,1,1,1,0,1,0,1,...,0,0,2,0,0,0,0,1,1,1
2,Zacky Boyyyyyyyyy,0,0,0,1,0,0,1,0,0,...,0,0,0,0,1,0,1,1,1,0
3,Ollie Williams,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Ségolène Veyre,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0


In [124]:
scores['sum'] = scores[botg_preds_dict.keys()].sum(axis = 1)
scores.sort_values(by='sum', ascending=False, inplace=True)
scores.reset_index(drop=True, inplace=True)

scores.head(10)


Unnamed: 0,players,62,63,64,65,66,67,68,69,70,...,73,74,75,77,78,79,80,81,82,sum
0,Bruce Capstaff,1,1,0,2,1,0,2,2,1,...,1,2,0,2,1,1,1,1,2,21
1,Manu Chauhan,2,1,1,1,0,0,3,0,1,...,0,3,0,1,3,0,1,1,1,21
2,Phil Mark,1,0,0,2,1,0,3,1,1,...,0,2,0,1,1,0,1,1,3,20
3,Kirsty Bradshaw,2,0,1,1,1,0,1,2,1,...,0,3,1,0,2,0,1,1,1,19
4,Felipe Gurgel,2,1,1,2,0,1,1,2,1,...,0,2,1,0,1,2,1,1,0,19
5,rory clarke,2,1,1,1,1,0,1,2,1,...,0,1,0,0,3,0,1,1,2,18
6,Stephen barlow,1,1,0,1,1,0,3,1,2,...,1,2,0,0,2,2,1,0,0,18
7,Nate Everett,2,1,0,1,1,0,2,1,1,...,1,2,0,0,3,0,0,1,2,18
8,Stu Greenwood,2,0,1,1,0,0,2,0,3,...,0,3,0,1,2,0,1,1,0,18
9,The56 West,1,1,1,2,0,0,0,2,1,...,0,2,0,1,1,2,1,1,0,18
