#### First, we'll import the libraries we need including `fastf1`, a Formula 1 data analysis library, and enable its data cache for lower latency.

In [1]:
import fastf1 as ff1
from fastf1 import plotting
from fastf1 import utils

import numpy as np
import pandas as pd

ff1.Cache.enable_cache('cache')

#### Next, we'll set the year, event, and session for which we'll pull data, then do the pulling.

In [2]:
year, grand_prix, session = 2021, 'Monza', 'R'

race = ff1.get_session(year, grand_prix, session)
race.load()

core           INFO 	Loading data for Italian Grand Prix - Race [v2.3.0]
api            INFO 	Using cached data for driver_info
Traceback (most recent call last):
  File "/Users/jddannunzio/opt/anaconda3/lib/python3.9/site-packages/urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
  File "/Users/jddannunzio/opt/anaconda3/lib/python3.9/site-packages/urllib3/util/connection.py", line 72, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Users/jddannunzio/opt/anaconda3/lib/python3.9/socket.py", line 954, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/jddannunzio/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_re

#### Now, let's set the drivers we're interested in and assign variables to their lap data, then select their fastest laps from these datasets.

In [3]:
driver_1, driver_2 = 'RIC', 'NOR'

laps_driver_1 = race.laps.pick_driver(driver_1)
laps_driver_2 = race.laps.pick_driver(driver_2)

fastest_driver_1 = laps_driver_1.pick_fastest()
fastest_driver_2 = laps_driver_2.pick_fastest()

In [4]:
print(laps_driver_1.shape)
laps_driver_1.head()

(53, 27)


Unnamed: 0,Time,DriverNumber,LapTime,LapNumber,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,...,Compound,TyreLife,FreshTyre,Stint,LapStartTime,Team,Driver,TrackStatus,IsAccurate,LapStartDate
0,0 days 01:03:52.443000,3,NaT,1.0,0 days 00:25:09.902000,NaT,NaT,0 days 00:00:29.470000,0 days 00:00:38.561000,NaT,...,MEDIUM,1.0,True,1,0 days 01:02:14.632000,McLaren,RIC,26,False,2021-09-12 13:03:16.241
1,0 days 01:05:23.188000,3,0 days 00:01:30.745000,2.0,NaT,NaT,0 days 00:00:32.860000,0 days 00:00:29.263000,0 days 00:00:28.622000,0 days 01:04:25.303000,...,MEDIUM,2.0,True,1,0 days 01:03:52.443000,McLaren,RIC,67,False,2021-09-12 13:04:54.052
2,0 days 01:06:49.514000,3,0 days 00:01:26.326000,3.0,NaT,NaT,0 days 00:00:28.357000,0 days 00:00:29.409000,0 days 00:00:28.560000,0 days 01:05:51.545000,...,MEDIUM,3.0,True,1,0 days 01:05:23.188000,McLaren,RIC,1,True,2021-09-12 13:06:24.797
3,0 days 01:08:15.827000,3,0 days 00:01:26.313000,4.0,NaT,NaT,0 days 00:00:28.325000,0 days 00:00:29.399000,0 days 00:00:28.589000,0 days 01:07:17.839000,...,MEDIUM,4.0,True,1,0 days 01:06:49.514000,McLaren,RIC,1,True,2021-09-12 13:07:51.123
4,0 days 01:09:42.237000,3,0 days 00:01:26.410000,5.0,NaT,NaT,0 days 00:00:28.499000,0 days 00:00:29.329000,0 days 00:00:28.582000,0 days 01:08:44.326000,...,MEDIUM,5.0,True,1,0 days 01:08:15.827000,McLaren,RIC,1,True,2021-09-12 13:09:17.436


#### Okay, now to do the actual data cleaning. I'm not sure replacing these NaN values is a *good* idea, but we can certainly do it. Here's `laps_driver_1` before the change:

In [8]:
print(laps_driver_1.SpeedI1)

0     314.0
1     312.0
2     306.0
3     305.0
4     307.0
5       NaN
6     306.0
7     309.0
8     308.0
9       NaN
10    310.0
11    309.0
12    311.0
13    310.0
14      NaN
15      NaN
16    311.0
17      NaN
18      NaN
19    310.0
20    309.0
21      NaN
22    312.0
23    315.0
24    309.0
25    311.0
26    156.0
27    120.0
28    153.0
29    197.0
30    303.0
31    308.0
32      NaN
33    306.0
34    308.0
35    318.0
36    305.0
37    309.0
38      NaN
39    310.0
40      NaN
41    311.0
42    312.0
43    311.0
44    309.0
45    310.0
46      NaN
47      NaN
48      NaN
49    311.0
50      NaN
51    312.0
52    311.0
Name: SpeedI1, dtype: float64


#### Let's replace the `NaN` values with an empty string. Here's a copy of the same DataFrame after those changes:

In [20]:
laps_d1_copy = laps_driver_1.copy()
laps_d1_copy.SpeedI1.fillna('',inplace=True)
print(laps_d1_copy.SpeedI1)

0     314.0
1     312.0
2     306.0
3     305.0
4     307.0
5          
6     306.0
7     309.0
8     308.0
9          
10    310.0
11    309.0
12    311.0
13    310.0
14         
15         
16    311.0
17         
18         
19    310.0
20    309.0
21         
22    312.0
23    315.0
24    309.0
25    311.0
26    156.0
27    120.0
28    153.0
29    197.0
30    303.0
31    308.0
32         
33    306.0
34    308.0
35    318.0
36    305.0
37    309.0
38         
39    310.0
40         
41    311.0
42    312.0
43    311.0
44    309.0
45    310.0
46         
47         
48         
49    311.0
50         
51    312.0
52    311.0
Name: SpeedI1, dtype: object
