In [116]:
import pandas as pd
import json
from pathlib import Path
from datetime import datetime
import plotly.express as px

In [117]:
def get_current_drivers():
    '''Returns list of current years drivers'''
    filepath = Path('../data/current_drivers.json')
    jsondata = dict()
    # Checks if data is already stored
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

    current_drivers = list()
    for driver in jsondata:
        current_drivers.append(driver['driverId'])
    return current_drivers

current_drivers = get_current_drivers()
current_drivers.sort()

In [118]:
def get_current_circuits():
    '''Returns list of current circuits'''
    filepath = Path('../data/scheduled/2022.json')
    jsondata = dict()
    # Checks if data is already stored
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    current_circuits = list()
    for circuit in jsondata:
        current_circuits.append(circuit['Circuit']['circuitId'])
    return current_circuits
current_circuits = get_current_circuits()

In [119]:
def get_round(year,circuitId):
    filepath = Path(f'../data/scheduled/{year}.json')
    jsondata = dict()
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    rounds = list()
    for circuit in jsondata:
        if circuit['Circuit']['circuitId'] == circuitId:
            return circuit['round']
    return None

In [120]:
# Takes a string lap time and converts it to a nanosecond equivalent
def time_to_nanoseconds(raw_time):
    try:
        dirty = datetime.strptime(raw_time, '%M:%S.%f').time()
        #clean = timedelta(minutes=dirty.minute, seconds=dirty.second, microseconds=dirty.microsecond)
        nanoseconds = (dirty.minute*6e10)+(dirty.second*1e9)+(dirty.microsecond*1e3)
        #nanoseconds = (dirty.microsecond*1000)
        return nanoseconds/1e9
    # Catch NaaN
    except:
        return raw_time

In [121]:
# find percent different between driver time and average time
def percent_difference(driver_time,average_time):
    diff = abs((driver_time - average_time)/((driver_time + average_time)/2))*100
    if driver_time > average_time:
        return -abs(diff)
    return diff

In [122]:
def sort_scores(score_list):
    cleaned_list = []
    nan_list = []
    
    for score in score_list:
        if str(score[0]) != 'nan':
            cleaned_list.append(score)
        else:
            nan_list.append(score)
    
    cleaned_list = sorted(cleaned_list,key=lambda x: x[1], reverse=True)
    cleaned_list.extend(nan_list)
    return cleaned_list

In [123]:
def get_score(year,round,current_drivers):
    filepath = Path(f'../data/races/{year}/{round}.json')
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

    # Gets list of all drivers at the start of the race, sorts them alphabetically
    drivers = []
    for driver in jsondata['Laps'][0]['Timings']:
        drivers.append(driver['driverId'])

    drivers.sort()

    # Create original dataframe which just holds a column of the drivers
    original_df = pd.DataFrame()
    original_df['Drivers'] = drivers

    # Goes through a race.json and adds the times for all laps per driver
    for lap in jsondata['Laps']:
        lap_data = []
        
        for _ in range(0, len(drivers)):
            try:
                driver = next(item for item in lap['Timings'] if item['driverId']==drivers[_])
                # Create a tuple of driver and lap time
                lap_data.append([driver['driverId'],driver['time']])
            except:
                lap_data.append([drivers[_],None])

        # Sort it so it matches the rows
        lap_data.sort(key = lambda x: x[0])
        # Add new column of lap time
        time_list = list(list(zip(*lap_data))[1])
        original_df[f"Lap {lap['number']}"] = time_list

    working_df = original_df.copy()
    # Convert each string laptime to nanosecond equivolent
    for col in working_df.columns[1:]: working_df[col] = working_df[col].apply(lambda x : time_to_nanoseconds(x))

    score_list = []
    for driver in current_drivers:
        score_avg = 0
        driver_row = working_df.loc[working_df['Drivers'] == driver]
        if not driver_row.empty:
            for lap in driver_row.columns[1:]:
                score_avg += percent_difference(driver_row[lap].values[0],working_df[lap].mean())
        else:
            score_avg = float('nan')
        score_list.append(score_avg)
    return score_list
        


In [124]:
df = pd.DataFrame()
df['Drivers'] = current_drivers

for year in range(2012,2022):
    round = get_round(year,'albert_park')
    if round != None:
        for driver in current_drivers:
            score_list = get_score(year,round,current_drivers)
    else: continue
    df[f'Season {year}'] = score_list
df


Unnamed: 0,Drivers,Season 2012,Season 2013,Season 2014,Season 2015,Season 2016,Season 2017,Season 2018,Season 2019
0,albon,,,,,,,,
1,alonso,50.513853,93.760324,78.702832,,,,22.798652,
2,bottas,,,66.528682,,98.465566,128.093221,8.467517,124.060371
3,de_vries,,,,,,,,
4,gasly,,,,,,,,
5,hamilton,77.129723,59.452027,,107.772428,183.285822,129.312156,55.989086,100.584347
6,hulkenberg,,,62.738009,,97.966368,,10.862832,
7,kevin_magnussen,,,88.930735,,-79.022493,,,26.746444
8,latifi,,,,,,,,
9,leclerc,,,,,,,-39.169362,59.000959


In [125]:
score_list = []
df_list = []
for driver in df['Drivers']:
    driver_row = df.loc[df['Drivers'] == driver]
    score = driver_row.drop('Drivers',axis=1).sum(axis=1, skipna=True).values[0]
    score_list.append((driver,score))
    df_list.append(score)
score_list = sort_scores(score_list)
df['Total'] = df_list
for _ in score_list:
    print(f'{_[0]: <20} Score:{_[1]}')

hamilton             Score:713.5255892012759
vettel               Score:680.7378048817968
bottas               Score:425.6153562661578
ricciardo            Score:310.7732637308615
max_verstappen       Score:307.0140912703562
alonso               Score:245.77566058119692
hulkenberg           Score:171.56721046197316
perez                Score:141.96747447381543
sainz                Score:107.65391170968486
kevin_magnussen      Score:36.65468565071083
leclerc              Score:19.8315971969484
albon                Score:0.0
de_vries             Score:0.0
gasly                Score:0.0
latifi               Score:0.0
mick_schumacher      Score:0.0
norris               Score:0.0
russell              Score:0.0
tsunoda              Score:0.0
zhou                 Score:0.0
ocon                 Score:-24.164497054296177
stroll               Score:-43.86864213086298


In [126]:
df.sort_values('Total', ascending=False ,inplace=True)
df

Unnamed: 0,Drivers,Season 2012,Season 2013,Season 2014,Season 2015,Season 2016,Season 2017,Season 2018,Season 2019,Total
5,hamilton,77.129723,59.452027,,107.772428,183.285822,129.312156,55.989086,100.584347,713.525589
20,vettel,83.93291,83.064553,,71.457901,178.355029,140.715804,63.620114,59.591494,680.737805
2,bottas,,,66.528682,,98.465566,128.093221,8.467517,124.060371,425.615356
15,ricciardo,29.066592,,93.304965,,140.353086,,48.048621,,310.773264
10,max_verstappen,,,,,83.17845,109.063294,15.983404,98.788943,307.014091
1,alonso,50.513853,93.760324,78.702832,,,,22.798652,,245.775661
6,hulkenberg,,,62.738009,,97.966368,,10.862832,,171.56721
14,perez,31.507634,19.839858,27.257294,,71.561436,,-8.198747,,141.967474
17,sainz,,,,,112.205946,,-4.552035,,107.653912
7,kevin_magnussen,,,88.930735,,-79.022493,,,26.746444,36.654686


In [129]:
fig = px.bar(x=df['Drivers'], y=df['Total'])
fig.show()