In [238]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
from datetime import datetime, timedelta
import time
import plotly.express as px
import plotly.graph_objects as go

In [239]:
filepath = Path('../data/races/2012/1.json')
with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

Dataframe

In [240]:
# Get list of drivers
drivers = []
for driver in jsondata['Laps'][0]['Timings']:
    drivers.append(driver['driverId'])

drivers.sort()
drivers

['alonso',
 'bruno_senna',
 'button',
 'glock',
 'grosjean',
 'hamilton',
 'kobayashi',
 'kovalainen',
 'maldonado',
 'massa',
 'michael_schumacher',
 'perez',
 'petrov',
 'pic',
 'raikkonen',
 'resta',
 'ricciardo',
 'rosberg',
 'vergne',
 'vettel',
 'webber']

In [241]:
df = pd.DataFrame()
df['Drivers'] = drivers
df

Unnamed: 0,Drivers
0,alonso
1,bruno_senna
2,button
3,glock
4,grosjean
5,hamilton
6,kobayashi
7,kovalainen
8,maldonado
9,massa


In [242]:
jsondata

{'season': '2012',
 'round': '1',
 'url': 'http://en.wikipedia.org/wiki/2012_Australian_Grand_Prix',
 'raceName': 'Australian Grand Prix',
 'Circuit': {'circuitId': 'albert_park',
  'url': 'http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit',
  'circuitName': 'Albert Park Grand Prix Circuit',
  'Location': {'lat': '-37.8497',
   'long': '144.968',
   'locality': 'Melbourne',
   'country': 'Australia'}},
 'date': '2012-03-18',
 'time': '06:00:00Z',
 'Laps': [{'number': '1',
   'Timings': [{'driverId': 'button', 'position': '1', 'time': '1:39.264'},
    {'driverId': 'hamilton', 'position': '2', 'time': '1:40.622'},
    {'driverId': 'michael_schumacher', 'position': '3', 'time': '1:42.002'},
    {'driverId': 'rosberg', 'position': '4', 'time': '1:42.666'},
    {'driverId': 'vettel', 'position': '5', 'time': '1:43.029'},
    {'driverId': 'grosjean', 'position': '6', 'time': '1:43.730'},
    {'driverId': 'maldonado', 'position': '7', 'time': '1:44.212'},
    {'driverId': 'alonso', 'p

In [243]:
for lap in jsondata['Laps']:
    lap_data = []
    
    for _ in range(0, len(drivers)):
        try:
            driver = next(item for item in lap['Timings'] if item['driverId']==drivers[_])
            # Create a tuple of driver and lap time
            lap_data.append([driver['driverId'],driver['time']])
        except:
            lap_data.append([drivers[_],None])
    # Sort it so it matches the rows
    lap_data.sort(key = lambda x: x[0])
    #print(lap_data)
    # Add new column of lap time
    time_list = list(list(zip(*lap_data))[1])
    #print(len(time_list))
    df[f"Lap {lap['number']}"] = time_list

In [244]:
df


Unnamed: 0,Drivers,Lap 1,Lap 2,Lap 3,Lap 4,Lap 5,Lap 6,Lap 7,Lap 8,Lap 9,...,Lap 49,Lap 50,Lap 51,Lap 52,Lap 53,Lap 54,Lap 55,Lap 56,Lap 57,Lap 58
0,alonso,1:44.733,1:35.866,1:34.081,1:34.186,1:34.220,1:35.651,1:34.207,1:34.465,1:33.893,...,1:31.317,1:31.140,1:30.506,1:30.277,1:30.593,1:30.675,1:30.894,1:31.025,1:31.019,1:33.838
1,bruno_senna,2:16.893,1:42.348,1:36.241,1:36.368,1:35.740,1:35.524,1:35.434,1:35.925,1:35.029,...,1:30.855,1:31.999,1:33.469,1:31.119,,,,,,
2,button,1:39.264,1:33.414,1:33.350,1:33.131,1:32.984,1:33.117,1:33.244,1:33.124,1:33.394,...,1:30.081,1:29.858,1:30.049,1:30.047,1:30.124,1:29.697,1:29.645,1:29.187,1:29.738,1:30.846
3,glock,1:50.819,1:38.975,1:38.691,1:37.576,1:37.679,1:37.845,1:39.003,1:37.121,1:36.844,...,1:34.497,1:34.407,1:34.723,1:34.784,1:34.451,1:39.855,1:37.174,1:39.441,1:49.259,
4,grosjean,1:43.730,,,,,,,,,...,,,,,,,,,,
5,hamilton,1:40.622,1:34.297,1:33.566,1:33.347,1:33.446,1:33.380,1:33.315,1:33.461,1:33.561,...,1:30.179,1:30.019,1:30.052,1:29.927,1:30.188,1:30.041,1:29.696,1:29.641,1:29.538,1:29.867
6,kobayashi,1:46.880,1:37.177,1:35.312,1:37.945,1:34.491,1:34.858,1:34.529,1:34.347,1:35.434,...,1:31.767,1:31.235,1:31.086,1:32.677,1:32.316,1:31.699,1:30.620,1:31.190,1:32.628,1:35.335
7,kovalainen,1:53.018,1:37.690,1:38.084,1:37.656,1:37.540,1:37.799,1:35.634,1:35.239,1:35.140,...,,,,,,,,,,
8,maldonado,1:44.212,1:36.857,1:34.569,1:34.068,1:40.441,1:34.096,1:34.874,1:34.983,1:34.510,...,1:31.460,1:31.243,1:30.641,1:30.316,1:30.254,1:30.873,1:30.921,1:30.713,1:31.075,
9,massa,1:46.714,1:36.908,1:35.111,1:35.243,1:35.208,1:34.631,1:34.628,1:35.261,1:36.496,...,,,,,,,,,,


In [245]:
test_df = df.copy()
test_df

Unnamed: 0,Drivers,Lap 1,Lap 2,Lap 3,Lap 4,Lap 5,Lap 6,Lap 7,Lap 8,Lap 9,...,Lap 49,Lap 50,Lap 51,Lap 52,Lap 53,Lap 54,Lap 55,Lap 56,Lap 57,Lap 58
0,alonso,1:44.733,1:35.866,1:34.081,1:34.186,1:34.220,1:35.651,1:34.207,1:34.465,1:33.893,...,1:31.317,1:31.140,1:30.506,1:30.277,1:30.593,1:30.675,1:30.894,1:31.025,1:31.019,1:33.838
1,bruno_senna,2:16.893,1:42.348,1:36.241,1:36.368,1:35.740,1:35.524,1:35.434,1:35.925,1:35.029,...,1:30.855,1:31.999,1:33.469,1:31.119,,,,,,
2,button,1:39.264,1:33.414,1:33.350,1:33.131,1:32.984,1:33.117,1:33.244,1:33.124,1:33.394,...,1:30.081,1:29.858,1:30.049,1:30.047,1:30.124,1:29.697,1:29.645,1:29.187,1:29.738,1:30.846
3,glock,1:50.819,1:38.975,1:38.691,1:37.576,1:37.679,1:37.845,1:39.003,1:37.121,1:36.844,...,1:34.497,1:34.407,1:34.723,1:34.784,1:34.451,1:39.855,1:37.174,1:39.441,1:49.259,
4,grosjean,1:43.730,,,,,,,,,...,,,,,,,,,,
5,hamilton,1:40.622,1:34.297,1:33.566,1:33.347,1:33.446,1:33.380,1:33.315,1:33.461,1:33.561,...,1:30.179,1:30.019,1:30.052,1:29.927,1:30.188,1:30.041,1:29.696,1:29.641,1:29.538,1:29.867
6,kobayashi,1:46.880,1:37.177,1:35.312,1:37.945,1:34.491,1:34.858,1:34.529,1:34.347,1:35.434,...,1:31.767,1:31.235,1:31.086,1:32.677,1:32.316,1:31.699,1:30.620,1:31.190,1:32.628,1:35.335
7,kovalainen,1:53.018,1:37.690,1:38.084,1:37.656,1:37.540,1:37.799,1:35.634,1:35.239,1:35.140,...,,,,,,,,,,
8,maldonado,1:44.212,1:36.857,1:34.569,1:34.068,1:40.441,1:34.096,1:34.874,1:34.983,1:34.510,...,1:31.460,1:31.243,1:30.641,1:30.316,1:30.254,1:30.873,1:30.921,1:30.713,1:31.075,
9,massa,1:46.714,1:36.908,1:35.111,1:35.243,1:35.208,1:34.631,1:34.628,1:35.261,1:36.496,...,,,,,,,,,,


In [246]:
def time_to_nanoseconds(raw_time):
    try:
        dirty = datetime.strptime(raw_time, '%M:%S.%f').time()
        #clean = timedelta(minutes=dirty.minute, seconds=dirty.second, microseconds=dirty.microsecond)
        nanoseconds = (dirty.minute*6e10)+(dirty.second*1e9)+(dirty.microsecond*1e3)
        #nanoseconds = (dirty.microsecond*1000)
        return nanoseconds/1e9
    # Catch NaaN
    except:
        return raw_time

In [247]:
test_df = df.copy()

for col in test_df.columns[1:]:
    test_df[col] = test_df[col].apply(lambda x : time_to_nanoseconds(x))

test_df

Unnamed: 0,Drivers,Lap 1,Lap 2,Lap 3,Lap 4,Lap 5,Lap 6,Lap 7,Lap 8,Lap 9,...,Lap 49,Lap 50,Lap 51,Lap 52,Lap 53,Lap 54,Lap 55,Lap 56,Lap 57,Lap 58
0,alonso,104.733,95.866,94.081,94.186,94.22,95.651,94.207,94.465,93.893,...,91.317,91.14,90.506,90.277,90.593,90.675,90.894,91.025,91.019,93.838
1,bruno_senna,136.893,102.348,96.241,96.368,95.74,95.524,95.434,95.925,95.029,...,90.855,91.999,93.469,91.119,,,,,,
2,button,99.264,93.414,93.35,93.131,92.984,93.117,93.244,93.124,93.394,...,90.081,89.858,90.049,90.047,90.124,89.697,89.645,89.187,89.738,90.846
3,glock,110.819,98.975,98.691,97.576,97.679,97.845,99.003,97.121,96.844,...,94.497,94.407,94.723,94.784,94.451,99.855,97.174,99.441,109.259,
4,grosjean,103.73,,,,,,,,,...,,,,,,,,,,
5,hamilton,100.622,94.297,93.566,93.347,93.446,93.38,93.315,93.461,93.561,...,90.179,90.019,90.052,89.927,90.188,90.041,89.696,89.641,89.538,89.867
6,kobayashi,106.88,97.177,95.312,97.945,94.491,94.858,94.529,94.347,95.434,...,91.767,91.235,91.086,92.677,92.316,91.699,90.62,91.19,92.628,95.335
7,kovalainen,113.018,97.69,98.084,97.656,97.54,97.799,95.634,95.239,95.14,...,,,,,,,,,,
8,maldonado,104.212,96.857,94.569,94.068,100.441,94.096,94.874,94.983,94.51,...,91.46,91.243,90.641,90.316,90.254,90.873,90.921,90.713,91.075,
9,massa,106.714,96.908,95.111,95.243,95.208,94.631,94.628,95.261,96.496,...,,,,,,,,,,


In [248]:
def percent_difference(driver_time,average_time,lap):
    # find percent different between driver time and average time
    diff = abs((driver_time - average_time)/((driver_time + average_time)/2))*100
    #print(f'{lap}\tTime: {driver_time*1e-9}\tAvg: {average_time*1e-9}\tDiff {diff}')
    if driver_time > average_time:
        #print(-abs(diff))
        return -abs(diff)
    #print(diff)
    return diff

In [249]:
score = 50
driver = test_df.loc[test_df['Drivers'] == 'massa']

for lap in driver.columns[1:]:
    score += percent_difference(driver[lap].values[0],test_df[lap].median(),lap)
score

nan

In [250]:
def sort_scores(score_list):
    cleaned_list = []
    nan_list = []
    
    for score in score_list:
        if str(score[0]) != 'nan':
            cleaned_list.append(score)
        else:
            nan_list.append(score)
    
    cleaned_list = sorted(cleaned_list,key=lambda x: x[0], reverse=True)
    if cleaned_list[-1][0] < 0:
        diff = round(abs(cleaned_list[-1][0])+cleaned_list[0][0],5)
    else:
        diff = round(abs(cleaned_list[-1][0]-cleaned_list[0][0]),5)
    cleaned_list.extend(nan_list)
    return [cleaned_list,diff]

### Consistency Score
> Driver Current Lap / Driver Previous Lap

In [288]:
fig = go.Figure()

for driver in drivers:
    score=0
    score_per_lap = []
    driver_row = test_df.loc[test_df['Drivers'] == driver]

    for lap in range(4,len(driver_row.columns[1:])):
        try: score += percent_difference(driver_row[f'Lap {lap}'].values[0],driver_row[f'Lap {lap-1}'].values[0],lap) 
        except: score = 0
        score_per_lap.append(score)

    if driver == 'button':
        fig.add_trace(go.Line(x=driver_row.columns[4:],y=score_per_lap, name=f'{driver}', mode='lines'))


fig.show()


Plotly

In [278]:
button = test_df.loc[test_df['Drivers'] == 'button']

In [279]:
ricciardo = test_df.loc[test_df['Drivers'] == 'ricciardo']

In [281]:
median = test_df[1:].median(axis=0, skipna=True).tolist()
average = test_df[1:].mean(axis=0, skipna=True).tolist()

fig = go.Figure()

# Average Line
fig.add_trace(go.Line(x=ricciardo.columns[1:].values.flatten().tolist(),y=average, name='Average', mode='lines'))
fig.add_trace(go.Line(x=ricciardo.columns[1:].values.flatten().tolist(),y=median, name='Median', mode='lines'))
#temp = test_df.loc[test_df['Drivers'] == 'pic']
fig.add_trace(go.Line(x=ricciardo.columns[1:].values.flatten().tolist(),y=alonso.iloc[0].values[1:].flatten().tolist(), name='Ricciardo'))
fig.add_trace(go.Line(x=button.columns[1:].values.flatten().tolist(),y=button.iloc[0].values[1:].flatten().tolist(), name='Button'))

# # Plot each driver
# for driver in drivers:
#     temp = test_df.loc[test_df['Drivers'] == driver]
#     fig.add_trace(go.Line(x=temp.columns[1:].values.flatten().tolist(),y=temp.iloc[0].values[1:].flatten().tolist(), name=driver))

fig.show()






plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




### Performance Score
> Driver Lap / Median of all times on that Lap

In [282]:
median = test_df[1:].median(axis=0, skipna=True).tolist()

fig = go.Figure()

for driver in drivers:
    score=0
    score_per_lap = []
    driver_row = test_df.loc[test_df['Drivers'] == driver]

    for lap in range(1,len(driver_row.columns[1:])+1):
        score += percent_difference(driver_row[f'Lap {lap}'].values[0],median[lap-1],lap)
        score_per_lap.append(score)

    fig.add_trace(go.Line(x=driver_row.columns[1:],y=score_per_lap,name=f'{driver}', mode='lines'))

fig.show()




plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [283]:
averageList = []
averageValue = sum(average)/len(average)

for _ in range(len(average)):
    averageList.append(averageValue)

In [284]:
# TODO Update yaxis

fig = go.Figure()
average = test_df[1:].mean(axis=0, skipna=True).tolist()

fig.add_trace(go.Box(x=drivers,y=averageList, name='Average'))
for driver in drivers:
    temp = test_df.loc[test_df['Drivers'] == driver]
    lap_time = temp.iloc[0].values[1:].flatten().tolist()
    fig.add_trace(go.Box(y=lap_time, name=driver))
fig.update_layout(yaxis_range = [80,180])

fig.show()



