### This Notebook scrapes all tracks and saves the pos data into a parquet-file

In [86]:
import fastf1 as ff1
import numpy as np
import pandas as pd
from tqdm import tqdm

In [107]:
data = ff1.get_event_schedule(2020)

In [109]:
data["EventName"].unique

<bound method Series.unique of 0               Pre-Season Test 1
1               Pre-Season Test 2
2             Austrian Grand Prix
3              Styrian Grand Prix
4            Hungarian Grand Prix
5              British Grand Prix
6     70th Anniversary Grand Prix
7              Spanish Grand Prix
8              Belgian Grand Prix
9              Italian Grand Prix
10              Tuscan Grand Prix
11             Russian Grand Prix
12               Eifel Grand Prix
13          Portuguese Grand Prix
14      Emilia Romagna Grand Prix
15             Turkish Grand Prix
16             Bahrain Grand Prix
17              Sakhir Grand Prix
18           Abu Dhabi Grand Prix
Name: EventName, dtype: object>

In [89]:
data["Location"].unique

<bound method Series.unique of 0             Barcelona
1             Barcelona
2             Spielberg
3             Spielberg
4              Budapest
5           Silverstone
6           Silverstone
7             Barcelona
8     Spa-Francorchamps
9                 Monza
10              Mugello
11                Sochi
12          Nürburgring
13             Portimão
14                Imola
15             Istanbul
16               Sakhir
17               Sakhir
18           Yas Island
Name: Location, dtype: object>

Get all GPs from the years 2020 to 2024

In [90]:
all_gps = []
for year in range(2020,2025):
    temp_df = pd.DataFrame(ff1.get_event_schedule(year)[["RoundNumber", "EventName"]])
    temp_df["year"] = year
    all_gps.append(temp_df)


df_all_gps = pd.concat(all_gps)

In [91]:
# remove one-off races
remove = ['70th Anniversary Grand Prix', 'Styrian Grand Prix', 'Sakhir Grand Prix']
df_all_gps = df_all_gps.loc[~df_all_gps["EventName"].isin(remove)]


Drop testing events

In [92]:
df_all_gps = df_all_gps.replace({0:None},regex=True)
df_all_gps = df_all_gps.dropna()

Scrape the track data for every track

In [93]:
# Hilfsfunktion um Positionen gleich richtig zu rotieren
def rotate(xy, *, angle):
    rot_mat = np.array(
        [[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]
    )
    return np.matmul(xy, rot_mat)

In [94]:
df_all_gps.head()

Unnamed: 0,RoundNumber,EventName,year
2,1,Austrian Grand Prix,2020
4,3,Hungarian Grand Prix,2020
5,4,British Grand Prix,2020
7,6,Spanish Grand Prix,2020
8,7,Belgian Grand Prix,2020


In [95]:
all_track_data = []
for _,gp_data in tqdm(df_all_gps.iterrows()):
    year = gp_data["year"]
    gp = gp_data["RoundNumber"]
    gp_name = gp_data["EventName"]

    session = ff1.get_session(year,gp,"Q")
    session.load()
    df_temp_pos_data = session.laps.pick_fastest().get_pos_data()
    # add the first row to end to make the lap complete
    temp_first_row = df_temp_pos_data.iloc[:1,:]
    df_temp_pos_data = pd.concat([df_temp_pos_data,temp_first_row])
    
    track = df_temp_pos_data.loc[:, ("X", "Y")].to_numpy()

    circuit_info = session.get_circuit_info()
    # Convert the rotation angle from degrees to radian.
    track_angle = circuit_info.rotation / 180 * np.pi

    # Rotate and plot the track map.
    rotated_track = rotate(track, angle=track_angle)
    
    df_temp_track_data = pd.DataFrame(rotated_track, columns=["x", "y"])
    
    df_temp_track_data["round_number"] = gp
    df_temp_track_data["year"] = year
    df_temp_track_data["gp_name"] = gp_name
    all_track_data.append(df_temp_track_data)

df_track_data = pd.concat(all_track_data)



0it [00:00, ?it/s]core           INFO 	Loading data for Austrian Grand Prix - Qualifying [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['77', '44', '33', '4', '23', '11', '16', '55', '18', '3', '5', '10', '26', '31', '8', '20', '63', '99', '7', '6']
1it [00:01,  1.16s/it]core           INFO 	Loading data for Hungarian Grand Prix - Qualifyin

KeyError: 'DriverNumber'

In [96]:
df_track_data = pd.concat(all_track_data)

Add the circuit keys

In [103]:
gp_to_circuit = {'Abu Dhabi Grand Prix' : 'yas_marina' ,
 'Australian Grand Prix' : 'albert_park',
 'Austrian Grand Prix' : 'red_bull_ring',
 'Azerbaijan Grand Prix': 'baku',
 'Bahrain Grand Prix': 'bahrain',
 'Belgian Grand Prix': 'spa',
 'British Grand Prix': 'silverstone',
 'Canadian Grand Prix': 'villeneuve',
 'Chinese Grand Prix': 'shanghai',
 'Dutch Grand Prix': 'zandvoort',
 'Eifel Grand Prix': 'nurburgring',
 'Emilia Romagna Grand Prix': 'imola',
 'French Grand Prix': 'ricard',
 'Hungarian Grand Prix': 'hungaroring',
 'Italian Grand Prix': 'monza',
 'Japanese Grand Prix': 'suzuka',
 'Las Vegas Grand Prix': 'vegas',
 'Mexico City Grand Prix': 'rodriguez',
 'Miami Grand Prix': 'miami',
 'Monaco Grand Prix': 'monaco',
 'Portuguese Grand Prix': 'portimao',
 'Qatar Grand Prix': 'losail',
 'Russian Grand Prix': 'sochi',
 'Saudi Arabian Grand Prix': 'jeddah',
 'Singapore Grand Prix': 'marina_bay',
 'Spanish Grand Prix': 'catalunya',
 'São Paulo Grand Prix': 'interlagos',
 'Turkish Grand Prix': 'istanbul',
 'Tuscan Grand Prix': 'mugello',
 'United States Grand Prix': 'americas'}

In [104]:
df_track_data['circuit_code'] = df_track_data['gp_name'].map(gp_to_circuit)

Save to parquet

In [106]:
df_track_data.to_parquet("../static/data/all_tracks_new.parquet",index=False)