Interacting with FastF1 Package

In [1]:
import numpy as np
import pandas as pd
import fastf1
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) #ignore future warnings so our file is not polluted with unnecessary info.

In [2]:
fastf1.Cache.enable_cache(os.getenv('LOCALAPPDATA') + "/pip/cache/fastF1") #Local cache path - runs faster if cache is enabled, not needed.

Loading session data for first race from 2022 competition

In [3]:
session = fastf1.get_session(2022, 1, 'R')
session.load()

core           INFO 	Loading data for Bahrain Grand Prix - Race [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '3', '27', '22', '24', '23', '4', '47', '31', '10', '63', '14', '20', '77', '44', '11', '55', '1', '18', '6']


Visualizing dataframe

In [4]:
session.laps.head()

Unnamed: 0,Time,DriverNumber,LapTime,LapNumber,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,...,Compound,TyreLife,FreshTyre,Stint,LapStartTime,Team,Driver,TrackStatus,IsAccurate,LapStartDate
0,0 days 01:04:14.256000,16,NaT,1,0 days 00:24:54.765000,NaT,NaT,0 days 00:00:42.120000,0 days 00:00:23.984000,NaT,...,SOFT,1.0,True,1,0 days 01:02:34.872000,Ferrari,LEC,1,False,2022-03-20 15:03:34.889
1,0 days 01:05:52.109000,16,0 days 00:01:37.853000,2,NaT,NaT,0 days 00:00:31.582000,0 days 00:00:42.159000,0 days 00:00:24.112000,0 days 01:04:45.838000,...,SOFT,2.0,True,1,0 days 01:04:14.256000,Ferrari,LEC,1,True,2022-03-20 15:05:14.273
2,0 days 01:07:30.381000,16,0 days 00:01:38.272000,3,NaT,NaT,0 days 00:00:31.635000,0 days 00:00:42.404000,0 days 00:00:24.233000,0 days 01:06:23.744000,...,SOFT,3.0,True,1,0 days 01:05:52.109000,Ferrari,LEC,1,True,2022-03-20 15:06:52.126
3,0 days 01:09:08.795000,16,0 days 00:01:38.414000,4,NaT,NaT,0 days 00:00:31.619000,0 days 00:00:42.482000,0 days 00:00:24.313000,0 days 01:08:02,...,SOFT,4.0,True,1,0 days 01:07:30.381000,Ferrari,LEC,1,True,2022-03-20 15:08:30.398
4,0 days 01:10:47.266000,16,0 days 00:01:38.471000,5,NaT,NaT,0 days 00:00:31.590000,0 days 00:00:42.565000,0 days 00:00:24.316000,0 days 01:09:40.385000,...,SOFT,5.0,True,1,0 days 01:09:08.795000,Ferrari,LEC,1,True,2022-03-20 15:10:08.812


In [5]:
session.laps.dtypes

Time                  timedelta64[ns]
DriverNumber                   object
LapTime               timedelta64[ns]
LapNumber                       int64
PitOutTime            timedelta64[ns]
PitInTime             timedelta64[ns]
Sector1Time           timedelta64[ns]
Sector2Time           timedelta64[ns]
Sector3Time           timedelta64[ns]
Sector1SessionTime    timedelta64[ns]
Sector2SessionTime    timedelta64[ns]
Sector3SessionTime    timedelta64[ns]
SpeedI1                       float64
SpeedI2                       float64
SpeedFL                       float64
SpeedST                       float64
IsPersonalBest                   bool
Compound                       object
TyreLife                      float64
FreshTyre                      object
Stint                           int64
LapStartTime          timedelta64[ns]
Team                           object
Driver                         object
TrackStatus                    object
IsAccurate                     object
LapStartDate

LapTime is a timedelta64 type column. Transforming it to numeric with a millisecond representation will help with data visualization and models.

In [6]:
session.laps['LapTime(ms)'] = session.laps['LapTime']//np.timedelta64(1, 'ms')   
session.laps['LapTime(ms)'].head()

0        NaN
1    97853.0
2    98272.0
3    98414.0
4    98471.0
dtype: float64

As we can see first row returns a non numerical value. That is ok because that row will not be considered following the criteria below:

'IsAccurate' value is set to False

In [7]:
session.laps[['LapNumber','LapTime(ms)','IsAccurate']].head()

Unnamed: 0,LapNumber,LapTime(ms),IsAccurate
0,1,,False
1,2,97853.0,True
2,3,98272.0,True
3,4,98414.0,True
4,5,98471.0,True


Getting event data

In [8]:
session.event.EventName

'Bahrain Grand Prix'

In [9]:
session.event.EventDate

Timestamp('2022-03-20 20:00:00')

We will need to compile this data for every race in the 2022 competition. So, for each session loaded, we specify the actions below:

Loading weather data

In [10]:
session.weather_data.head()

Unnamed: 0,Time,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed
0,0 days 00:01:03.204000,25.6,17.0,1010.2,False,32.3,346,0.5
1,0 days 00:02:03.202000,25.7,17.0,1010.2,False,32.3,347,0.6
2,0 days 00:03:03.205000,25.7,17.0,1010.0,False,32.2,359,0.4
3,0 days 00:04:03.220000,25.7,17.0,1010.2,False,32.2,8,0.4
4,0 days 00:05:03.218000,25.6,17.0,1010.0,False,32.1,16,0.5


There is no info about which event we are working with. We add the column 'EventName' so whe can distinguish between them on our resulting dataframe with all weather data info.

In [11]:
session.weather_data['EventName'] = session.event.name
session.weather_data.head()

Unnamed: 0,Time,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,EventName
0,0 days 00:01:03.204000,25.6,17.0,1010.2,False,32.3,346,0.5,Bahrain Grand Prix
1,0 days 00:02:03.202000,25.7,17.0,1010.2,False,32.3,347,0.6,Bahrain Grand Prix
2,0 days 00:03:03.205000,25.7,17.0,1010.0,False,32.2,359,0.4,Bahrain Grand Prix
3,0 days 00:04:03.220000,25.7,17.0,1010.2,False,32.2,8,0.4,Bahrain Grand Prix
4,0 days 00:05:03.218000,25.6,17.0,1010.0,False,32.1,16,0.5,Bahrain Grand Prix


In [12]:
session.race_control_messages.head()

Unnamed: 0,Time,Category,Message,Status,Flag,Scope,Sector,RacingNumber
0,2022-03-20 14:20:01,Flag,GREEN LIGHT - PIT EXIT OPEN,,GREEN,Track,,
1,2022-03-20 14:30:01,Other,PIT EXIT CLOSED,,,,,
2,2022-03-20 14:45:05,Other,RISK OF RAIN FOR F1 RACE IS 0%,,,,,
3,2022-03-20 14:57:07,Drs,DRS DISABLED,DISABLED,,,,
4,2022-03-20 15:03:35,Flag,GREEN LIGHT - PIT EXIT OPEN,,GREEN,Track,,


In [13]:
np.array(session.drivers)

array(['16', '3', '27', '22', '24', '23', '4', '47', '31', '10', '63',
       '14', '20', '77', '44', '11', '55', '1', '18', '6'], dtype='<U2')

We also have driver specific data

In [14]:
session.get_driver('16')

DriverNumber                  16
BroadcastName          C LECLERC
Abbreviation                 LEC
TeamName                 Ferrari
TeamColor                 ed1c24
FirstName                Charles
LastName                 Leclerc
FullName         Charles Leclerc
Position                     0.0
GridPosition                 0.0
Q1                           NaT
Q2                           NaT
Q3                           NaT
Time                         NaT
Status                          
Points                       0.0
Name: Charles, dtype: object

In [15]:
np.array(session.get_driver('16').index)

array(['DriverNumber', 'BroadcastName', 'Abbreviation', 'TeamName',
       'TeamColor', 'FirstName', 'LastName', 'FullName', 'Position',
       'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points'],
      dtype=object)

And telemetry for each lap. To access these we need to specify driver and lap beforehand.

In [16]:
session_16 = session.laps.pick_driver('16').head()
session_16

Unnamed: 0,Time,DriverNumber,LapTime,LapNumber,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,...,TyreLife,FreshTyre,Stint,LapStartTime,Team,Driver,TrackStatus,IsAccurate,LapStartDate,LapTime(ms)
0,0 days 01:04:14.256000,16,NaT,1,0 days 00:24:54.765000,NaT,NaT,0 days 00:00:42.120000,0 days 00:00:23.984000,NaT,...,1.0,True,1,0 days 01:02:34.872000,Ferrari,LEC,1,False,2022-03-20 15:03:34.889,
1,0 days 01:05:52.109000,16,0 days 00:01:37.853000,2,NaT,NaT,0 days 00:00:31.582000,0 days 00:00:42.159000,0 days 00:00:24.112000,0 days 01:04:45.838000,...,2.0,True,1,0 days 01:04:14.256000,Ferrari,LEC,1,True,2022-03-20 15:05:14.273,97853.0
2,0 days 01:07:30.381000,16,0 days 00:01:38.272000,3,NaT,NaT,0 days 00:00:31.635000,0 days 00:00:42.404000,0 days 00:00:24.233000,0 days 01:06:23.744000,...,3.0,True,1,0 days 01:05:52.109000,Ferrari,LEC,1,True,2022-03-20 15:06:52.126,98272.0
3,0 days 01:09:08.795000,16,0 days 00:01:38.414000,4,NaT,NaT,0 days 00:00:31.619000,0 days 00:00:42.482000,0 days 00:00:24.313000,0 days 01:08:02,...,4.0,True,1,0 days 01:07:30.381000,Ferrari,LEC,1,True,2022-03-20 15:08:30.398,98414.0
4,0 days 01:10:47.266000,16,0 days 00:01:38.471000,5,NaT,NaT,0 days 00:00:31.590000,0 days 00:00:42.565000,0 days 00:00:24.316000,0 days 01:09:40.385000,...,5.0,True,1,0 days 01:09:08.795000,Ferrari,LEC,1,True,2022-03-20 15:10:08.812,98471.0


We will use driver 16's first lap in this example

In [17]:
lap = 1 - 1 # First line of the dataframe (index = 0) represents lap number 1
telemetry_lap = session_16.iloc[lap,].get_telemetry()  
telemetry_lap.head()

Unnamed: 0,Date,SessionTime,DriverAhead,DistanceToDriverAhead,Time,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,Status,X,Y,Z
2,2022-03-20 15:03:34.889,0 days 01:02:34.872000,,0.0,0 days 00:00:00,10002,0,1,34,True,1,interpolation,0.006942,1.347057e-06,OnTrack,-281,3527,-159
3,2022-03-20 15:03:34.961,0 days 01:02:34.944000,,0.0,0 days 00:00:00.072000,10002,0,1,34,True,1,pos,0.003243,6.293407e-07,OnTrack,-281,3527,-159
4,2022-03-20 15:03:34.988,0 days 01:02:34.971000,,0.0,0 days 00:00:00.099000,10002,0,1,34,True,1,car,0.0,0.0,OnTrack,-280,3526,-158
5,2022-03-20 15:03:35.188,0 days 01:02:35.171000,,0.0,0 days 00:00:00.299000,9592,0,1,34,False,1,car,0.0,0.0,OnTrack,-280,3526,-158
6,2022-03-20 15:03:35.321,0 days 01:02:35.304000,,0.0,0 days 00:00:00.432000,8186,5,1,34,False,1,pos,0.178311,3.46026e-05,OnTrack,-281,3527,-159


There is data for each small intervall of time, usually less than a fifth of a second. RPM, Speed, Throttle, Brake, DRS is not specified in clear way in the documentation, so we will compile data for the approximate relative distance for each gear. This column represents the estimated percentage of the lap that has been completed during this time interval.

In [18]:
df_tel = telemetry_lap['RelativeDistance'].diff().groupby(telemetry_lap['nGear']).sum().reset_index()
df_tel


Unnamed: 0,nGear,RelativeDistance
0,1,0.008317
1,2,0.034742
2,3,0.093897
3,4,0.161845
4,5,0.205396
5,6,0.281518
6,7,0.213158


In [19]:
df_tel['DriverNumber'] = '16'        
df_tel['LapNumber'] = lap + 1
df_tel['EventName'] = session.event.EventName
df_tel

Unnamed: 0,nGear,RelativeDistance,DriverNumber,LapNumber,EventName
0,1,0.008317,16,1,Bahrain Grand Prix
1,2,0.034742,16,1,Bahrain Grand Prix
2,3,0.093897,16,1,Bahrain Grand Prix
3,4,0.161845,16,1,Bahrain Grand Prix
4,5,0.205396,16,1,Bahrain Grand Prix
5,6,0.281518,16,1,Bahrain Grand Prix
6,7,0.213158,16,1,Bahrain Grand Prix


What it tells us is that driver 16 completed first lap and went through appoximately 21.3% of the lap using the 7th gear, and so on. We can check that it sums to almost 1. The difference is due to imprecisions and roundings.

In [20]:
df_tel['RelativeDistance'].sum()

0.9988737768257536

session.laps represents each lap as a row. Compiling data for all gears in only one row will help us combine these informations.

In [21]:
df_tel = df_tel.pivot(index=['DriverNumber','LapNumber','EventName'],columns = ['nGear'])
df_tel


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RelativeDistance,RelativeDistance,RelativeDistance,RelativeDistance,RelativeDistance,RelativeDistance,RelativeDistance
Unnamed: 0_level_1,Unnamed: 1_level_1,nGear,1,2,3,4,5,6,7
DriverNumber,LapNumber,EventName,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
16,1,Bahrain Grand Prix,0.008317,0.034742,0.093897,0.161845,0.205396,0.281518,0.213158


In [22]:
df_tel.columns

MultiIndex([('RelativeDistance', 1),
            ('RelativeDistance', 2),
            ('RelativeDistance', 3),
            ('RelativeDistance', 4),
            ('RelativeDistance', 5),
            ('RelativeDistance', 6),
            ('RelativeDistance', 7)],
           names=[None, 'nGear'])

In [23]:
df_tel = df_tel.rename(columns={'RelativeDistance': "Gear"})
df_tel.columns

MultiIndex([('Gear', 1),
            ('Gear', 2),
            ('Gear', 3),
            ('Gear', 4),
            ('Gear', 5),
            ('Gear', 6),
            ('Gear', 7)],
           names=[None, 'nGear'])

In [24]:
df_tel.columns = ['_'.join(str(s).strip() for s in col if s) for col in df_tel.columns]
df_tel.columns


Index(['Gear_1', 'Gear_2', 'Gear_3', 'Gear_4', 'Gear_5', 'Gear_6', 'Gear_7'], dtype='object')

In [25]:
df_tel

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Gear_1,Gear_2,Gear_3,Gear_4,Gear_5,Gear_6,Gear_7
DriverNumber,LapNumber,EventName,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
16,1,Bahrain Grand Prix,0.008317,0.034742,0.093897,0.161845,0.205396,0.281518,0.213158


In [26]:
df_tel.reset_index(inplace=True)
df_tel

Unnamed: 0,DriverNumber,LapNumber,EventName,Gear_1,Gear_2,Gear_3,Gear_4,Gear_5,Gear_6,Gear_7
0,16,1,Bahrain Grand Prix,0.008317,0.034742,0.093897,0.161845,0.205396,0.281518,0.213158


Creating empty dataframes to store our data

In [27]:
df_races = pd.DataFrame()
df_weather_data = pd.DataFrame()
df_races_control_data = pd.DataFrame()
df_drivers_info = pd.DataFrame()
df_telemetries = pd.DataFrame()

In [28]:
print([i for i in range(1,23)])

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]


In [29]:
#Races 1 to 22 can be loaded using:

for i in range(1,23):
    session = fastf1.get_session(2022, i, 'R')
    session.load()


core           INFO 	Loading data for Bahrain Grand Prix - Race [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '3', '27', '22', '24', '23', '4', '47', '31', '10', '63', '14', '20', '77', '44', '11', '55', '1', '18', '6']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v2.3.0]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timi