This notebook scrapes the coordinates of each driver for every race for every year

In [2]:
import fastf1 as ff1
import numpy as np
import pandas as pd

Get all GPs from the years 2018 to 2024

In [3]:
all_gps = []
for year in range(2020,2025):
    temp_df = pd.DataFrame(ff1.get_event_schedule(year)["RoundNumber"])
    temp_df["year"] = year
    if year != 2024:
        all_gps.append(temp_df)
    else:
        all_gps.append(temp_df.loc[:8])




df_all_gps = pd.concat(all_gps)



Drop testing events

In [4]:
df_all_gps = df_all_gps.replace({0:None})
df_all_gps = df_all_gps.dropna()
df_all_gps = df_all_gps.reset_index(drop=True)

In [5]:
df_all_gps

Unnamed: 0,RoundNumber,year
0,1,2020
1,2,2020
2,3,2020
3,4,2020
4,5,2020
...,...,...
86,4,2024
87,5,2024
88,6,2024
89,7,2024


Scrape the track data for every track

In [6]:
# Hilfsfunktion um Positionen gleich richtig zu rotieren
def rotate(xy, *, angle):
    rot_mat = np.array(
        [[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]
    )
    return np.matmul(xy, rot_mat)

In [7]:
all_driver_data = []
for _,gp_data in df_all_gps.iterrows():
    year = gp_data["year"]
    gp = gp_data["RoundNumber"]

    session = ff1.get_session(year,gp,"R")
    session.load()

    for driver in session.drivers:
        
        try:
            df_pos_data_driver = session.laps.pick_driver(driver).get_pos_data()
            pos_data_driver = df_pos_data_driver.loc[:, ("X", "Y")].to_numpy()

            circuit_info = session.get_circuit_info()
            # Convert the rotation angle from degrees to radian.
            track_angle = circuit_info.rotation / 180 * np.pi

            # Rotate and plot the track map.
            rotated_track = rotate(pos_data_driver, angle=track_angle)
            
            df_temp_race_data_driver = pd.DataFrame(rotated_track, columns=["x", "y"])
            
            temp_driver_info = session.get_driver(driver)


            df_temp_race_data_driver["round_number"] = gp
            df_temp_race_data_driver["year"] = year
            df_temp_race_data_driver["driver_number"] = temp_driver_info["DriverNumber"]
            df_temp_race_data_driver["date"] = df_pos_data_driver["Date"]
            df_temp_race_data_driver["pos_index"] = df_temp_race_data_driver.index

            all_driver_data.append(df_temp_race_data_driver)

        
        except:
            print("No data available")
        # add the first row to end to make the lap complete
        


df_track_data = pd.concat(all_driver_data)



core           INFO 	Loading data for Austrian Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['77', '16', '4', '44', '55', '11', '10', '31', '99', '5', '6', '26', '23', '7', '63', '8', '20', '18', '3', '33']
core           INFO 	Loading data for Styrian Grand Prix - Race

No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available
No data available


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['33', '44', '77', '10', '16', '14', '55', '11', '31', '4', '3', '18', '5', '99', '88', '6', '63', '47', '22', '9']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data


No data available


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_statu

No data available
No data available


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '11', '63', '44', '4', '3', '31', '77', '10', '23', '24', '18', '47', '20', '22', '6', '14', '1', '5', '55']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_statu

No data available


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '44', '55', '63', '14', '31', '10', '40', '22', '24', '27', '20', '23', '2', '18', '11', '77']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data


No data available


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '55', '11', '63', '10', '18', '22', '23', '2', '27', '77', '24', '20', '3', '14', '81', '31', '44', '16']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_

In [8]:
df_track_data["round_number"] = df_track_data["round_number"].astype(np.int8) 
df_track_data["pos_index"] = df_track_data["pos_index"].astype(np.uint16) 
df_track_data["year"] = df_track_data["year"].astype(np.int16) 
df_track_data["driver_number"] = df_track_data["driver_number"].astype(np.int8) 

In [9]:
df_track_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 40804266 entries, 0 to 9444
Data columns (total 7 columns):
 #   Column         Dtype         
---  ------         -----         
 0   x              float64       
 1   y              float64       
 2   round_number   int8          
 3   year           int16         
 4   driver_number  int8          
 5   date           datetime64[ns]
 6   pos_index      uint16        
dtypes: datetime64[ns](1), float64(2), int16(1), int8(2), uint16(1)
memory usage: 1.4 GB


In [11]:
#df_track_data.to_parquet("./race_data.parquet")