In [1]:
import pandas as pd
import missingno as msno
import seaborn as sns

# Standard plotly imports
from _plotly_future_ import v4_subplots
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [2]:
pd.options.display.max_rows=100

In [3]:
DIR = 'formula-1-race-data-19502017/'
!ls 'formula-1-race-data-19502017/'

[31mcircuits.csv[m[m             [31mdrivers.csv[m[m              [31mresults.csv[m[m
[31mconstructorResults.csv[m[m   [31mlapTimes.csv[m[m             [31mseasons.csv[m[m
[31mconstructorStandings.csv[m[m [31mpitStops.csv[m[m             [31mstatus.csv[m[m
[31mconstructors.csv[m[m         [31mqualifying.csv[m[m
[31mdriverStandings.csv[m[m      [31mraces.csv[m[m


In [4]:
pitstops = pd.read_csv(DIR + 'pitStops.csv', encoding = "ISO-8859-1")
races = pd.read_csv(DIR + 'races.csv', encoding = "ISO-8859-1")
laptimes = pd.read_csv(DIR + 'lapTimes.csv', encoding = "ISO-8859-1")
drivers = pd.read_csv(DIR + 'drivers.csv', encoding = "ISO-8859-1")

In [5]:
pitstops.sort_values(by=["raceId","driverId"])

Unnamed: 0,raceId,driverId,stop,lap,time,duration,milliseconds
11,841,1,1,16,17:28:24,23.227,23227
33,841,1,2,36,17:59:29,23.199,23199
10,841,2,1,15,17:27:41,22.994,22994
29,841,2,2,30,17:51:32,25.098,25098
13,841,3,1,16,17:29:00,23.716,23716
...,...,...,...,...,...,...,...
6229,988,840,1,11,17:23:10,21.613,21613
6244,988,840,2,29,17:55:13,21.543,21543
6250,988,840,3,35,18:06:11,21.908,21908
6245,988,842,1,30,17:56:53,21.519,21519


In [6]:
# Only take laptimes which have pitstop information
laptimes = laptimes.loc[laptimes["raceId"].isin(pitstops["raceId"])]
laptimes.sort_values(by=["raceId","driverId"])

Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
58,841,1,1,2,1:40.573,100573
59,841,1,2,2,1:33.774,93774
60,841,1,3,2,1:32.900,92900
61,841,1,4,2,1:32.582,92582
62,841,1,5,2,1:32.471,92471
...,...,...,...,...,...,...
426520,988,843,50,15,1:44.204,104204
426521,988,843,51,15,1:44.171,104171
426522,988,843,52,15,1:43.897,103897
426523,988,843,53,15,1:44.249,104249


In [7]:
# Magic number 974 is the raceId
df = laptimes.merge(
    pitstops, 
    on=["raceId", "driverId", "lap"], 
    how="left",
    suffixes=('_lt', '_ps')
).sort_values(by=["raceId", "driverId", "lap"])
df = df[df['raceId'] == 974]
df['raceId'] = df['raceId'].astype(str)
df = df.merge(drivers, on="driverId")

In [22]:
stops = df.loc[df["stop"] >= 1]
# stops
stops[["lap", "driverRef", "position", "duration"]]

Unnamed: 0,lap,driverRef,position,duration
45,46,hamilton,7,24.155
111,34,raikkonen,3,24.833
193,38,massa,16,24.353
217,62,massa,12,25.123
234,1,button,20,24.465
272,39,button,19,26.057
329,39,vettel,1,24.306
408,40,grosjean,9,24.66
477,16,perez,16,31.313
524,63,perez,11,25.026


In [29]:
# fig = make_subplots(rows=2, cols=1)

fig = df.iplot(
    x='lap',
    y='position',
    categories='driverRef',
    xTitle="Lap",
    yTitle="Position",
    title="Race position over laps (Monaco GP 2017)",
    mode="lines",
    asFigure=True
)

fig.add_scatter(
    x=stops["lap"],
    y=stops["position"],
    mode="markers",
    name="pit duration",
    hoverinfo=None
)

iplot(fig)