# Objective
 
To explore and visualize F1 pit stop data (1950-2021) provided by Ergast Developer API (https://ergast.com/mrd/), while answering the following questions:

* How did pit stop durations change over time?
* relation between pit stop durations and constructor?
* relation between pit stop durations and race circuit?
* ratio between time spent in pit lane and race duration?
* best pit stop performance team?

# Data Setup

In [26]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import os

"fnames = []\nfpaths = []\nfor dirname, _, filenames in os.walk(''):\n    for filename in filenames:\n        print(filename)\n        fnames.append(filename.split('.')[0])\n        fpaths.append(os.path.join(dirname, filename))\n        print(os.path.join(dirname, filename))\n"

In [27]:
# Read csv data
fpath = 'data/'
circuits = pd.read_csv(f'{fpath}circuits.csv', index_col=0, na_values=r'\N')
constructorResults = pd.read_csv(f'{fpath}constructor_results.csv', index_col=0, na_values=r'\N')
constructors = pd.read_csv(f'{fpath}constructors.csv', index_col=0, na_values=r'\N')
constructorStandings = pd.read_csv(f'{fpath}constructor_standings.csv', index_col=0, na_values=r'\N')
drivers = pd.read_csv(f'{fpath}drivers.csv', index_col=0, na_values=r'\N')
driverStandings = pd.read_csv(f'{fpath}driver_standings.csv', index_col=0, na_values=r'\N')
lapTimes = pd.read_csv(f'{fpath}lap_times.csv')
pitStops = pd.read_csv(f'{fpath}pit_stops.csv')
qualifying = pd.read_csv(f'{fpath}qualifying.csv', index_col=0, na_values=r'\N')
races = pd.read_csv(f'{fpath}races.csv', na_values=r'\N')
results = pd.read_csv(f'{fpath}results.csv', index_col=0, na_values=r'\N')
seasons = pd.read_csv(f'{fpath}seasons.csv', index_col=0, na_values=r'\N')
status = pd.read_csv(f'{fpath}status.csv', index_col=0, na_values=r'\N')

# reformat previously read data for easier usage 
circuits = circuits.rename(columns={'name':'circuitName','location':'circuitLocation','country':'circuitCountry','url':'circuitUrl'})
drivers = drivers.rename(columns={'nationality':'driverNationality','url':'driverUrl'})
drivers['driverName'] = drivers['forename']+' '+drivers['surname']
constructors = constructors.rename(columns={'name':'constructorName','nationality':'constructorNationality','url':'constructorUrl'})
races.index = races.index.set_names(['raceId','year','round','circuitId','raceName','date','time','raceUrl','a','b'])
races = races[[]].reset_index()[['raceId','year','round','circuitId','raceName','date','time','raceUrl']]
races.set_index('raceId',inplace=True)
races['date'] = races['date'].apply(lambda x: dt.datetime.strptime(x,'%Y-%m-%d'))
pitStops = pitStops.rename(columns={'time':'pitTime'})
pitStops['seconds'] = pitStops['milliseconds'].apply(lambda x: x/1000)
results['seconds'] = results['milliseconds'].apply(lambda x: x/1000)

In [28]:
# Constructor color mapping
constructor_color_map = {
    'Toro Rosso':'#0000FF',
    'Mercedes':'#6CD3BF',
    'Red Bull':'#1E5BC6',
    'Ferrari':'#ED1C24',
    'Williams':'#37BEDD',
    'Force India':'#FF80C7',
    'Virgin':'#c82e37',
    'Renault':'#FFD800',
    'McLaren':'#F58020',
    'Sauber':'#006EFF',
    'Lotus':'#FFB800',
    'HRT':'#b2945e',
    'Caterham':'#0b361f',
    'Lotus F1':'#FFB800',
    'Marussia':'#6E0000',
    'Manor Marussia':'#6E0000',
    'Haas F1 Team':'#B6BABD',
    'Racing Point':'#F596C8',
    'Aston Martin':'#2D826D',
    'Alfa Romeo':'#B12039',
    'AlphaTauri':'#4E7C9B',
    'Alpine F1 Team':'#2293D1'
}

# Pit Stop Data

<b>Note:</b> In the context of the data I'm using, pit stop durations include the total time in the pit lane and not only when the car is stationary.  

In [29]:
pitStops

Unnamed: 0,raceId,driverId,stop,lap,pitTime,duration,milliseconds,seconds
0,841,153,1,1,17:05:23,26.898,26898,26.898
1,841,30,1,1,17:05:52,25.021,25021,25.021
2,841,17,1,11,17:20:48,23.426,23426,23.426
3,841,4,1,12,17:22:34,23.251,23251,23.251
4,841,13,1,13,17:24:10,23.842,23842,23.842
...,...,...,...,...,...,...,...,...
8882,1074,840,3,45,19:20:32,25.039,25039,25.039
8883,1074,846,3,45,19:20:59,24.975,24975,24.975
8884,1074,849,3,45,19:21:07,24.658,24658,24.658
8885,1074,825,3,46,19:21:29,26.051,26051,26.051


In [30]:
pitStops.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
raceId,8887.0,945.217621,69.807472,841.0,882.0,943.0,1004.0,1074.0
driverId,8887.0,495.26038,394.115984,1.0,17.0,814.0,828.0,855.0
stop,8887.0,1.769664,0.922955,1.0,1.0,2.0,2.0,6.0
lap,8887.0,25.152357,14.496368,1.0,13.0,25.0,36.0,78.0
milliseconds,8887.0,67550.097108,248002.120977,12897.0,21915.5,23546.0,26099.0,2077164.0
seconds,8887.0,67.550097,248.002121,12.897,21.9155,23.546,26.099,2077.164


In [31]:
newResults = pd.merge(results,races,left_on='raceId',right_index=True,how='left')
newResults = pd.merge(newResults,circuits,left_on='circuitId',right_index=True,how='left')
newResults = pd.merge(newResults,constructors,left_on='constructorId',right_index=True,how='left')
newResults = pd.merge(newResults,drivers,left_on='driverId',right_index=True,how='left')
newResults

Unnamed: 0_level_0,raceId,driverId,constructorId,number_x,grid,position,positionText,positionOrder,points,laps,...,constructorUrl,driverRef,number_y,code,forename,surname,dob,driverNationality,driverUrl,driverName
resultId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,18,1,1,22.0,1,1.0,1,1,10.0,58,...,http://en.wikipedia.org/wiki/McLaren,hamilton,44.0,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,Lewis Hamilton
2,18,2,2,3.0,5,2.0,2,2,8.0,58,...,http://en.wikipedia.org/wiki/BMW_Sauber,heidfeld,,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld,Nick Heidfeld
3,18,3,3,7.0,7,3.0,3,3,6.0,58,...,http://en.wikipedia.org/wiki/Williams_Grand_Pr...,rosberg,6.0,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg,Nico Rosberg
4,18,4,4,5.0,11,4.0,4,4,5.0,58,...,http://en.wikipedia.org/wiki/Renault_in_Formul...,alonso,14.0,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso,Fernando Alonso
5,18,5,1,23.0,3,5.0,5,5,4.0,58,...,http://en.wikipedia.org/wiki/McLaren,kovalainen,,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen,Heikki Kovalainen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25421,1074,849,3,6.0,20,16.0,16,16,0.0,57,...,http://en.wikipedia.org/wiki/Williams_Grand_Pr...,latifi,6.0,LAT,Nicholas,Latifi,1995-06-29,Canadian,http://en.wikipedia.org/wiki/Nicholas_Latifi,Nicholas Latifi
25422,1074,807,117,27.0,17,17.0,17,17,0.0,57,...,http://en.wikipedia.org/wiki/Aston_Martin_in_F...,hulkenberg,27.0,HUL,Nico,Hülkenberg,1987-08-19,German,http://en.wikipedia.org/wiki/Nico_H%C3%BClkenberg,Nico Hülkenberg
25423,1074,815,9,11.0,4,18.0,18,18,0.0,56,...,http://en.wikipedia.org/wiki/Red_Bull_Racing,perez,11.0,PER,Sergio,Pérez,1990-01-26,Mexican,http://en.wikipedia.org/wiki/Sergio_P%C3%A9rez,Sergio Pérez
25424,1074,830,9,1.0,2,19.0,19,19,0.0,54,...,http://en.wikipedia.org/wiki/Red_Bull_Racing,max_verstappen,33.0,VER,Max,Verstappen,1997-09-30,Dutch,http://en.wikipedia.org/wiki/Max_Verstappen,Max Verstappen


In [32]:
newPitStops = pd.merge(pitStops,races,left_on='raceId',right_index=True,how='left')
newPitStops = pd.merge(newPitStops,circuits,left_on='circuitId',right_index=True,how='left')
newPitStops = pd.merge(newPitStops,newResults[['raceId','driverId','driverName','constructorId','constructorName']],left_on=['raceId','driverId'],right_on=['raceId','driverId'])
newPitStops

Unnamed: 0,raceId,driverId,stop,lap,pitTime,duration,milliseconds,seconds,year,round,...,circuitName,circuitLocation,circuitCountry,lat,lng,alt,circuitUrl,driverName,constructorId,constructorName
0,841,153,1,1,17:05:23,26.898,26898,26.898,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.9680,10.0,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Jaime Alguersuari,5,Toro Rosso
1,841,30,1,1,17:05:52,25.021,25021,25.021,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.9680,10.0,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Michael Schumacher,131,Mercedes
2,841,17,1,11,17:20:48,23.426,23426,23.426,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.9680,10.0,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Mark Webber,9,Red Bull
3,841,4,1,12,17:22:34,23.251,23251,23.251,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.9680,10.0,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Fernando Alonso,6,Ferrari
4,841,13,1,13,17:24:10,23.842,23842,23.842,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.9680,10.0,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Felipe Massa,6,Ferrari
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8882,1074,840,3,45,19:20:32,25.039,25039,25.039,2022,1,...,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7.0,http://en.wikipedia.org/wiki/Bahrain_Internati...,Lance Stroll,117,Aston Martin
8883,1074,846,3,45,19:20:59,24.975,24975,24.975,2022,1,...,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7.0,http://en.wikipedia.org/wiki/Bahrain_Internati...,Lando Norris,1,McLaren
8884,1074,849,3,45,19:21:07,24.658,24658,24.658,2022,1,...,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7.0,http://en.wikipedia.org/wiki/Bahrain_Internati...,Nicholas Latifi,3,Williams
8885,1074,825,3,46,19:21:29,26.051,26051,26.051,2022,1,...,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7.0,http://en.wikipedia.org/wiki/Bahrain_Internati...,Kevin Magnussen,210,Haas F1 Team


In [33]:
raceResults = pd.merge(newResults,newPitStops.groupby(by=['raceId','raceName','constructorName','driverId','driverName']).sum(numeric_only=True),left_on=['raceId','driverId'],right_on=['raceId','driverId'],how='left')
raceResults['pitPercentage'] = raceResults['milliseconds_y']/raceResults['milliseconds_x']*100
raceResults

Unnamed: 0,raceId,driverId,constructorId_x,number_x,grid,position,positionText,positionOrder,points,laps,...,milliseconds_y,seconds_y,year_y,round_y,circuitId_y,lat_y,lng_y,alt_y,constructorId_y,pitPercentage
0,18,1,1,22.0,1,1.0,1,1,10.0,58,...,,,,,,,,,,
1,18,2,2,3.0,5,2.0,2,2,8.0,58,...,,,,,,,,,,
2,18,3,3,7.0,7,3.0,3,3,6.0,58,...,,,,,,,,,,
3,18,4,4,5.0,11,4.0,4,4,5.0,58,...,,,,,,,,,,
4,18,5,1,23.0,3,5.0,5,5,4.0,58,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25415,1074,849,3,6.0,20,16.0,16,16,0.0,57,...,73893.0,73.893,6066.0,3.0,9.0,78.0975,151.5318,21.0,9.0,1.249168
25416,1074,807,117,27.0,17,17.0,17,17,0.0,57,...,79825.0,79.825,6066.0,3.0,9.0,78.0975,151.5318,21.0,351.0,1.348985
25417,1074,815,9,11.0,4,18.0,18,18,0.0,56,...,74166.0,74.166,6066.0,3.0,9.0,78.0975,151.5318,21.0,27.0,
25418,1074,830,9,1.0,2,19.0,19,19,0.0,54,...,74236.0,74.236,6066.0,3.0,9.0,78.0975,151.5318,21.0,27.0,


# Exploratory Data Analysis

## How did pit stop durations change over time?

Main observations
* Average pit times had a meaningful increase from 2013 to 2014
* Average pit times have been fairly stable from 2014 onwards
* Majority of pit times are clustered around 20-35s
* Pit durations appear to have more variance in the recent years 

In [34]:
fig = px.line(newPitStops[newPitStops['seconds']<50].groupby(by=['year','constructorName']).mean(numeric_only=True).reset_index(),
                 x='year',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Average Pit Stop Durations by Constructor',
)
fig.show()

In [35]:
fig = px.scatter(newPitStops[newPitStops['seconds']<50],
                 x='date',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Pit Stop Durations over Time by Constructor',
)
fig.show()

In [36]:
fig = px.box(newPitStops[newPitStops['seconds']<50],
                 x='date',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Pit Stop Durations over Time by Constructor',
)
fig.show()

## Is there a relationship between pit stop durations and constructors?

Main observations:
* Constructors on average are fairly similar in pit durations
* No significant performance discrepancy. Minor performance variations.

In [37]:
fig = px.box(newPitStops[newPitStops['seconds']<50].groupby(by=['raceId','raceName','date','constructorName']).mean(numeric_only=True).reset_index().sort_values(by='seconds',ascending=True),
                 x='constructorName',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Pit Stop Durations by Constructor from 2011 to date',
)
fig.show()

In [38]:
year = 2021
fig = px.box(newPitStops[(newPitStops['seconds']<50)&(newPitStops['year']==year)].groupby(by=['raceId','raceName','date','constructorName']).mean(numeric_only=True).reset_index().sort_values(by='seconds',ascending=True),
                 x='constructorName',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text=f'Pit Stop Durations by Constructor for {year} Season',
)
fig.show()

## Is there a relationship between pit stop durations and race circuit?

Main Observations:
* Race circuits appear to have a more significant impact on overall pit duration
* Race circuits appear to have an impact on total pit time over the course of the race
* Some circuits have larger variances, but on average the variance from track to track appear to be fairly consistent

In [39]:
fig = px.box(newPitStops[newPitStops['seconds']<50].groupby(by=['raceId','raceName','circuitName']).mean(numeric_only=True).reset_index().sort_values(by='seconds',ascending=True),
                 x='circuitName',
                 y='seconds',
                )
fig.update_layout(
    title_text='Pit Stop Durations by Race Circuit',
)
fig.show()

In [40]:
fig = px.scatter(newPitStops[newPitStops['seconds']<50].groupby(by=['circuitName']).mean(numeric_only=True).reset_index().sort_values(by='seconds',ascending=True),
                 x='circuitName',
                 y='seconds',
                )
fig.update_layout(
    title_text='Average Race Pit Stop Durations by Circuit',
)
fig.show()

In [41]:
fig = px.box(newPitStops[newPitStops['seconds']<50].groupby(by=['raceId','raceName','circuitName','constructorName']).mean(numeric_only=True).reset_index().sort_values(by='seconds',ascending=True),
                 x='circuitName',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Average Race Pit Stop Durations by Race Circuit',
)
fig.show()

## Total Time in the Pit Lane

In [42]:
newPitStops[newPitStops['seconds']<50].groupby(by=['raceId','circuitName','driverId']).sum(numeric_only=True).groupby(by=['raceId','circuitName']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,stop,lap,milliseconds,seconds,year,round,circuitId,lat,lng,alt,constructorId
raceId,circuitName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
841,Albert Park Grand Prix Circuit,3.666667,51.047619,52163.190476,52.163190,4309.285714,2.142857,2.142857,-81.106500,310.645714,21.428571,82.380952
842,Sepang International Circuit,5.272727,68.818182,65528.545455,65.528545,5393.136364,5.363636,5.363636,7.404044,272.842818,48.272727,128.590909
843,Shanghai International Circuit,4.375000,62.625000,53889.833333,53.889833,4859.916667,7.250000,41.083333,75.735675,292.948333,12.083333,139.916667
844,Istanbul Park,8.217391,96.565217,79556.695652,79.556696,7082.217391,14.086957,17.608696,144.221204,103.556739,457.826087,173.217391
845,Circuit de Barcelona-Catalunya,6.958333,91.416667,69196.041667,69.196042,6451.958333,16.041667,12.833333,133.370417,7.254395,349.708333,167.583333
...,...,...,...,...,...,...,...,...,...,...,...,...
1070,Autódromo Hermanos Rodríguez,1.944444,43.611111,33364.055556,33.364056,2806.944444,25.000000,44.444444,26.950278,-137.625972,3093.055556,122.833333
1071,Autódromo José Carlos Pace,10.000000,77.850000,83650.900000,83.650900,7982.950000,75.050000,71.100000,-93.629220,-184.463815,3100.750000,365.850000
1072,Jeddah Corniche Circuit,1.727273,16.454545,25475.545455,25.475545,2204.727273,22.909091,84.000000,23.598436,42.659345,16.363636,99.363636
1073,Yas Marina Circuit,2.333333,50.055556,35141.000000,35.141000,3256.055556,35.444444,38.666667,39.419378,87.971661,4.833333,138.555556


In [43]:
fig = px.box(newPitStops[newPitStops['seconds']<50].groupby(by=['raceId','circuitName','driverId']).sum(numeric_only=True).reset_index().sort_values(by='seconds',ascending=True),
                 x='circuitName',
                 y='seconds',
                )
fig.update_layout(
    title_text='Total Time Spent in Pit Lane by Circuit',
)
fig.show()

## Percentage of race spent in the pit lane

Main Observations:
* Findings from pit percentage unsurprisingly are very similar to the average pit time with its correlation to circuit  
* There doesn't appear to be much of a correlation between percentage of time in the pit and the race outcome
* Pit percentage does not appear to yield any interesting insights

In [44]:
fig = px.box(raceResults[raceResults['pitPercentage']<10].groupby(by=['raceId','raceName','circuitName','constructorName']).mean(numeric_only=True).reset_index().sort_values(by='pitPercentage',ascending=True),
                 x='circuitName',
                 y='pitPercentage',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Average Race Percentage in the Pit Lane by Race Circuit',
)
fig.add_hline(y=raceResults[raceResults['pitPercentage']<10].mean(numeric_only=True)['pitPercentage'],line_dash='dash',annotation_text=f"Average pit percentage: {raceResults[raceResults['pitPercentage']<10]['pitPercentage'].mean():.2f}%")
fig.show()

In [45]:
fig = px.scatter(raceResults[raceResults['pitPercentage']<10].groupby(by=['raceId','raceName','circuitName','constructorName']).mean(numeric_only=True).reset_index(),
                 x='pitPercentage',
                 y='positionOrder',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Average Race Percentage in the Pit Lane by Race Circuit',
)
fig.show()

## What is a "good" pit stop?

After exploring some relationships that pit stops might have with other features, the circuit has a meaningful effect to the pit times measured. To get a good reference on what a "good" pit stop is, we can take a look at the distribution of all the pit stops available.

There may be some minor normalization based on the circuit to provide a more meaningful comparison with respect to a particular pit time.

In [46]:
fig = px.histogram(newPitStops[(newPitStops['seconds']<50)],
                 x='seconds',
                 
                )
fig.update_layout(
    title_text='Pit Stop Duration Distribution',
)
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].mean(),annotation_text=f"Average: {newPitStops[(newPitStops['seconds']<50)]['seconds'].mean():.2f}s")
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.1),line_dash='dash',annotation_text=f"Top Decile: {newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.1):.2f}s")
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.9),line_dash='dash',annotation_text=f"Bottom Decile: {newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.9):.2f}s")
fig.update_traces(opacity=0.9)
fig.show()

### Circuit Specific Benchmarks

Double click on the circuit to see the circuit specific pit time distribution.

In [47]:
fig = px.histogram(newPitStops[(newPitStops['seconds']<50)],
                 x='seconds',
                 color='circuitName',
                )
fig.update_layout(
    title_text='Pit Stop Duration Distribution by Circuit',
    barmode='overlay',
)
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].mean(),annotation_text=f"Average: {newPitStops[(newPitStops['seconds']<50)]['seconds'].mean():.2f}s",annotation_position='top')
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.1),line_dash='dash',annotation_text=f"Top Decile: {newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.1):.2f}s",annotation_position='top left')
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.9),line_dash='dash',annotation_text=f"Bottom Decile: {newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.9):.2f}s",annotation_position='bottom right')
fig.update_traces(opacity=0.9)
fig.show()

# So who does it best?

As far as performance is concerned, it seems like speed and consistency are the two main factors that indicate a great team. Average pit time will provide both an indication of speed and consistency as an expected performance metric. Obviously the lower average the better. Standard deviation is another aspect that we can look at to evaluate the consistency of a team's ability to perform.

### Ranking on Average Pit Time 

In [48]:
year = 2021
newPitStops[(newPitStops['seconds']<50)&(newPitStops['year']==year)].groupby(by='constructorName')['seconds'].describe().sort_values(by='mean')

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
constructorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Red Bull,78.0,23.978026,4.623024,15.277,21.392,22.744,25.38925,44.608
Mercedes,75.0,24.118133,4.659454,15.432,21.5555,22.68,25.5,40.266
Ferrari,63.0,24.301857,4.996534,15.092,21.522,23.064,26.379,42.786
McLaren,66.0,24.527,4.924235,14.994,21.4925,23.5255,26.5325,38.267
Aston Martin,66.0,24.7995,5.382271,14.945,21.4905,23.55,26.13175,43.124
Williams,67.0,24.93691,5.009406,18.153,21.9615,23.681,26.507,46.315
Alpine F1 Team,61.0,24.983246,5.237372,15.432,21.452,23.844,29.116,40.8
Alfa Romeo,68.0,25.003559,5.171887,14.881,21.7985,24.0195,28.4005,37.19
AlphaTauri,68.0,25.212441,5.380044,14.943,21.75,24.3225,28.79725,40.74
Haas F1 Team,69.0,25.39829,5.636702,15.054,22.113,24.293,27.186,49.729


### Ranking on Consistency

In [49]:
newPitStops[(newPitStops['seconds']<50)&(newPitStops['year']==year)].groupby(by='constructorName')['seconds'].describe().sort_values(by='std')

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
constructorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Red Bull,78.0,23.978026,4.623024,15.277,21.392,22.744,25.38925,44.608
Mercedes,75.0,24.118133,4.659454,15.432,21.5555,22.68,25.5,40.266
McLaren,66.0,24.527,4.924235,14.994,21.4925,23.5255,26.5325,38.267
Ferrari,63.0,24.301857,4.996534,15.092,21.522,23.064,26.379,42.786
Williams,67.0,24.93691,5.009406,18.153,21.9615,23.681,26.507,46.315
Alfa Romeo,68.0,25.003559,5.171887,14.881,21.7985,24.0195,28.4005,37.19
Alpine F1 Team,61.0,24.983246,5.237372,15.432,21.452,23.844,29.116,40.8
AlphaTauri,68.0,25.212441,5.380044,14.943,21.75,24.3225,28.79725,40.74
Aston Martin,66.0,24.7995,5.382271,14.945,21.4905,23.55,26.13175,43.124
Haas F1 Team,69.0,25.39829,5.636702,15.054,22.113,24.293,27.186,49.729


### Constructor Specific Performance

In [50]:
fig = px.histogram(newPitStops[(newPitStops['seconds']<50)&(newPitStops['year']==year)],
                 x='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Pit Stop Duration Distribution by Constructor',
    barmode='overlay',
)
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].mean(),annotation_text=f"Average: {newPitStops[(newPitStops['seconds']<50)]['seconds'].mean():.2f}s",annotation_position='top')
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.1),line_dash='dash',annotation_text=f"Top Decile: {newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.1):.2f}s",annotation_position='top left')
fig.add_vline(x=newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.9),line_dash='dash',annotation_text=f"Bottom Decile: {newPitStops[(newPitStops['seconds']<50)]['seconds'].quantile(0.9):.2f}s",annotation_position='bottom right')

fig.update_traces(opacity=0.5)
fig.show()

# Conclusions

Pit stops are cool and play an integral part of the F1 sport, but optimizing it is probably a waste of time (at least in today's state). The pit time on average accounts for less than 1% of the race time (0.83%). Your efforts are likely better spent in other areas. 

### How did pit stop durations change over time?
Average pit stops have increased since 2013 and have stayed relatively similar since then. Pit stop times have also increased in variance over the past couple years as well

### Is there a relationship between pit stop durations and constructor?
Not meaningful. There are some differences between the constructors; however, it doesn't appear to make a significant results on the race outcome.

### Is there a relationship between pit stop durations and race circuit?
Yes. The circuits have an impact on the overall time spent in the pit lane. Either by the number of stops, track layout, or the length of pit lane. 

### What is the time spent in the pit lane as a percentage of the race?
Average time spent in the pit lane is about 

### Who is the best constructor on pit stop performance (for 2021)?
1. Red Bull
2. Mercedes
3. Ferrari