### This Notebook scrapes all tracks and saves the pos data into a parquet-file

In [85]:
import fastf1 as ff1
import numpy as np
import pandas as pd
from tqdm import tqdm
from fastf1.ergast import Ergast
ergast = Ergast()

In [86]:
data = ff1.get_event_schedule(2024)

Get all GPs from the years 2020 to 2024

In [87]:
all_gps = []
for year in range(2020,2025):
    temp_df = pd.DataFrame(ff1.get_event_schedule(year)[["RoundNumber", "EventName"]])
    temp_df["year"] = year
    all_gps.append(temp_df)


df_all_gps = pd.concat(all_gps)

In [88]:
remove = ['70th Anniversary Grand Prix', 'Sakhir Grand Prix']

df_all_gps = df_all_gps.loc[
    ~df_all_gps["EventName"].isin(remove) &
    ~((df_all_gps["EventName"] == "Austrian Grand Prix") & (df_all_gps["year"] == 2020)) &
    ~((df_all_gps["EventName"] == "Styrian Grand Prix") & (df_all_gps["year"] == 2021)) & 
    ~((~df_all_gps["EventName"].isin(["Bahrain Grand Prix", "Saudi Arabian Grand Prix", "Australian Grand Prix", 
                                   "Japanese Grand Prix", "Chinese Grand Prix",  "Miami Grand Prix",
                                   "Emilia Romagna Grand Prix", "Monaco Grand Prix"]) & (df_all_gps["year"] == 2024)))
]

Drop testing events

In [89]:
df_all_gps = df_all_gps.replace({0:None},regex=True)
df_all_gps = df_all_gps.dropna()

Scrape the track data for every track

In [90]:
# Hilfsfunktion um Positionen gleich richtig zu rotieren
def rotate(xy, *, angle):
    rot_mat = np.array(
        [[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]
    )
    return np.matmul(xy, rot_mat)

In [91]:
df_all_gps

Unnamed: 0,RoundNumber,EventName,year
3,2,Styrian Grand Prix,2020
4,3,Hungarian Grand Prix,2020
5,4,British Grand Prix,2020
7,6,Spanish Grand Prix,2020
8,7,Belgian Grand Prix,2020
...,...,...,...
4,4,Japanese Grand Prix,2024
5,5,Chinese Grand Prix,2024
6,6,Miami Grand Prix,2024
7,7,Emilia Romagna Grand Prix,2024


In [92]:
all_track_data = []
for _,gp_data in tqdm(df_all_gps.iterrows()):
    year = gp_data["year"]
    gp = gp_data["RoundNumber"]
    gp_name = gp_data["EventName"]

    session = ff1.get_session(year,gp,"Q")
    session.load()
    df_temp_pos_data = session.laps.pick_fastest().get_pos_data()
    
    # add the first row to end to make the lap complete
    temp_first_row = df_temp_pos_data.iloc[:1,:]
    df_temp_pos_data = pd.concat([df_temp_pos_data,temp_first_row])
    
    track = df_temp_pos_data.loc[:, ("X", "Y")].to_numpy()

    circuit_info = session.get_circuit_info()
    # Convert the rotation angle from degrees to radian.
    track_angle = circuit_info.rotation / 180 * np.pi

    # Rotate and plot the track map.
    rotated_track = rotate(track, angle=track_angle)
    
    df_temp_track_data = pd.DataFrame(rotated_track, columns=["x", "y"])
    
    df_temp_track_data["round_number"] = gp
    df_temp_track_data["year"] = year
    df_temp_track_data["gp_name"] = gp_name
    all_track_data.append(df_temp_track_data)

df_track_data = pd.concat(all_track_data)

0it [00:00, ?it/s]core           INFO 	Loading data for Styrian Grand Prix - Qualifying [v3.3.7]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
Request for URL https://ergast.com/api/f1/2020/2/qualifying.json failed; using cached response
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.12/site-packages/requests_cache/session.py", line 285, in _resend
    response = self._send_and_cache(request, actions, cached_response, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.12/site-packages/requests_cache/session.py", line 253, in _send_and_cache
    response = super().send(request, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.12/site-packages/fastf1/req.py", line 125, in send
    lim.limit()
  File "/opt/homebrew/lib/python3.12/site-packages/fastf1/req.py", line 102, in limit
    raise R

In [93]:
df_track_data = pd.concat(all_track_data)

In [94]:
df_track_data

Unnamed: 0,x,y,round_number,year,gp_name
0,1171.680323,-1174.730276,2,2020,Styrian Grand Prix
1,1011.455297,-1220.533564,2,2020,Styrian Grand Prix
2,864.175935,-1263.110428,2,2020,Styrian Grand Prix
3,699.986423,-1310.983222,2,2020,Styrian Grand Prix
4,534.779612,-1357.873620,2,2020,Styrian Grand Prix
...,...,...,...,...,...
264,-10471.544323,299.106168,8,2024,Monaco Grand Prix
265,-10386.691509,405.172186,8,2024,Monaco Grand Prix
266,-10229.713803,587.605735,8,2024,Monaco Grand Prix
267,-10098.899049,736.805266,8,2024,Monaco Grand Prix


Add the circuit keys

In [95]:
gp_to_circuit = {'Abu Dhabi Grand Prix' : 'yas_marina' ,
 'Australian Grand Prix' : 'albert_park',
 'Austrian Grand Prix' : 'red_bull_ring',
'Styrian Grand Prix' : 'red_bull_ring',
 'Azerbaijan Grand Prix': 'baku',
 'Bahrain Grand Prix': 'bahrain',
 'Belgian Grand Prix': 'spa',
 'British Grand Prix': 'silverstone',
 'Canadian Grand Prix': 'villeneuve',
 'Chinese Grand Prix': 'shanghai',
 'Dutch Grand Prix': 'zandvoort',
 'Eifel Grand Prix': 'nurburgring',
 'Emilia Romagna Grand Prix': 'imola',
 'French Grand Prix': 'ricard',
 'Hungarian Grand Prix': 'hungaroring',
 'Italian Grand Prix': 'monza',
 'Japanese Grand Prix': 'suzuka',
 'Las Vegas Grand Prix': 'vegas',
 'Mexico City Grand Prix': 'rodriguez',
 'Miami Grand Prix': 'miami',
 'Monaco Grand Prix': 'monaco',
 'Portuguese Grand Prix': 'portimao',
 'Qatar Grand Prix': 'losail',
 'Russian Grand Prix': 'sochi',
 'Saudi Arabian Grand Prix': 'jeddah',
 'Singapore Grand Prix': 'marina_bay',
 'Spanish Grand Prix': 'catalunya',
 'São Paulo Grand Prix': 'interlagos',
 'Turkish Grand Prix': 'istanbul',
 'Tuscan Grand Prix': 'mugello',
 'United States Grand Prix': 'americas'}

In [96]:
df_track_data['circuit_code'] = df_track_data['gp_name'].map(gp_to_circuit)

In [97]:
df_track_data.head()

Unnamed: 0,x,y,round_number,year,gp_name,circuit_code
0,1171.680323,-1174.730276,2,2020,Styrian Grand Prix,red_bull_ring
1,1011.455297,-1220.533564,2,2020,Styrian Grand Prix,red_bull_ring
2,864.175935,-1263.110428,2,2020,Styrian Grand Prix,red_bull_ring
3,699.986423,-1310.983222,2,2020,Styrian Grand Prix,red_bull_ring
4,534.779612,-1357.87362,2,2020,Styrian Grand Prix,red_bull_ring


Add the track geo information

In [98]:
all_circuits = []
unique_circuit_ids = set()

for year in range(2020, 2025):
    temp_df = ergast.get_circuits(season=year, result_type='pandas')[["circuitId", "circuitName", "long", "lat", "locality", "country"]]
    
    temp_df = temp_df[~temp_df['circuitId'].isin(unique_circuit_ids)]
    unique_circuit_ids.update(temp_df['circuitId'])
    
    all_circuits.append(temp_df)

all_circuits_df = pd.concat(all_circuits).reset_index(drop=True)


Request for URL https://ergast.com/api/f1/2020/circuits.json failed; using cached response
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.12/site-packages/requests_cache/session.py", line 285, in _resend
    response = self._send_and_cache(request, actions, cached_response, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.12/site-packages/requests_cache/session.py", line 253, in _send_and_cache
    response = super().send(request, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.12/site-packages/fastf1/req.py", line 125, in send
    lim.limit()
  File "/opt/homebrew/lib/python3.12/site-packages/fastf1/req.py", line 102, in limit
    raise RateLimitExceededError(self._info)
fastf1.req.RateLimitExceededError: ergast.com: 200 calls/h
Request for URL https://ergast.com/api/f1/2021/circuits.json failed; using cached response
Traceback (most recent call 

In [107]:
df_circuits_geo = pd.read_csv("../static/data/circuit_geo.csv")
df_circuits = pd.read_parquet("../static/data/all_tracks_new.parquet")
circuit_data = df_circuits_geo.merge(df_circuits[["round_number", "year", "gp_name", "circuit_code"]].drop_duplicates(), left_on='circuitId', right_on='circuit_code', how='right')

In [108]:
df_circuits_geo

Unnamed: 0,circuitId,circuitName,long,lat,locality,country
0,bahrain,Bahrain International Circuit,50.5106,26.0325,Sakhir,Bahrain
1,catalunya,Circuit de Barcelona-Catalunya,2.26111,41.57,Montmeló,Spain
2,hungaroring,Hungaroring,19.2486,47.5789,Budapest,Hungary
3,imola,Autodromo Enzo e Dino Ferrari,11.7167,44.3439,Imola,Italy
4,istanbul,Istanbul Park,29.405,40.9517,Istanbul,Turkey
5,monza,Autodromo Nazionale di Monza,9.28111,45.6156,Monza,Italy
6,mugello,Autodromo Internazionale del Mugello,11.3719,43.9975,Mugello,Italy
7,nurburgring,Nürburgring,6.9475,50.3356,Nürburg,Germany
8,portimao,Autódromo Internacional do Algarve,-8.6267,37.227,Portimão,Portugal
9,red_bull_ring,Red Bull Ring,14.7647,47.2197,Spielberg,Austria


In [109]:
circuit_data = circuit_data.sort_values(by=["year", "round_number"], axis=0)

In [110]:
circuit_data.to_csv("../static/data/circuit_data.csv", index=False)

In [111]:
pd.read_csv("../static/data/circuit_data.csv")


Unnamed: 0,circuitId,circuitName,long,lat,locality,country,round_number,year,gp_name,circuit_code
0,red_bull_ring,Red Bull Ring,14.76470,47.2197,Spielberg,Austria,2,2020,Styrian Grand Prix,red_bull_ring
1,hungaroring,Hungaroring,19.24860,47.5789,Budapest,Hungary,3,2020,Hungarian Grand Prix,hungaroring
2,silverstone,Silverstone Circuit,-1.01694,52.0786,Silverstone,UK,4,2020,British Grand Prix,silverstone
3,catalunya,Circuit de Barcelona-Catalunya,2.26111,41.5700,Montmeló,Spain,6,2020,Spanish Grand Prix,catalunya
4,spa,Circuit de Spa-Francorchamps,5.97139,50.4372,Spa,Belgium,7,2020,Belgian Grand Prix,spa
...,...,...,...,...,...,...,...,...,...,...
82,suzuka,Suzuka Circuit,136.54100,34.8431,Suzuka,Japan,4,2024,Japanese Grand Prix,suzuka
83,shanghai,Shanghai International Circuit,121.22000,31.3389,Shanghai,China,5,2024,Chinese Grand Prix,shanghai
84,miami,Miami International Autodrome,-80.23890,25.9581,Miami,USA,6,2024,Miami Grand Prix,miami
85,imola,Autodromo Enzo e Dino Ferrari,11.71670,44.3439,Imola,Italy,7,2024,Emilia Romagna Grand Prix,imola


Save to parquet


In [102]:
df_track_data.to_parquet("../static/data/all_tracks_new.parquet",index=False)