### This Notebook scrapes all tracks and saves the pos data into a parquet-file

In [None]:
import fastf1 as ff1
import numpy as np
import pandas as pd
from tqdm import tqdm

Get all GPs from the years 2018 to 2024

In [None]:
all_gps = []
for year in range(2020,2025):
    temp_df = pd.DataFrame(ff1.get_event_schedule(year)["RoundNumber"])
    temp_df["year"] = year
    if year != 2024:
        all_gps.append(temp_df)
    else:
        all_gps.append(temp_df.loc[:8])




df_all_gps = pd.concat(all_gps)

Drop testing events

In [None]:
df_all_gps = df_all_gps.replace({0:None})
df_all_gps = df_all_gps.dropna()
df_all_gps = df_all_gps.reset_index(drop=True)

In [None]:
df_all_gps

Scrape the track data for every track

In [None]:
# Hilfsfunktion um Positionen gleich richtig zu rotieren
def rotate(xy, *, angle):
    rot_mat = np.array(
        [[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]
    )
    return np.matmul(xy, rot_mat)

In [None]:
all_driver_data = []
for _,gp_data in df_all_gps.iterrows():
    year = gp_data["year"]
    gp = gp_data["RoundNumber"]

    session = ff1.get_session(year,gp,"R")
    session.load()

    for driver in session.drivers:
        
        try:
            df_pos_data_driver = session.laps.pick_driver(driver).get_pos_data()
            pos_data_driver = df_pos_data_driver.loc[:, ("X", "Y")].to_numpy()

            circuit_info = session.get_circuit_info()
            # Convert the rotation angle from degrees to radian.
            track_angle = circuit_info.rotation / 180 * np.pi

            # Rotate and plot the track map.
            rotated_track = rotate(pos_data_driver, angle=track_angle)
            
            df_temp_race_data_driver = pd.DataFrame(rotated_track, columns=["x", "y"])
            
            temp_driver_info = session.get_driver(driver)


            df_temp_race_data_driver["round_number"] = gp
            df_temp_race_data_driver["year"] = year
            df_temp_race_data_driver["driver_number"] = temp_driver_info["DriverNumber"]
            df_temp_race_data_driver["data"] = df_pos_data_driver["Date"]
            df_temp_race_data_driver["pos_index"] = df_temp_race_data_driver.index

            all_driver_data.append(df_temp_race_data_driver)

        
        except:
            print("No data available")
        # add the first row to end to make the lap complete
        


df_track_data = pd.concat(all_driver_data)



In [None]:
df_track_data["round_number"] = df_track_data["round_number"].astype(np.int8) 
df_track_data["pos_index"] = df_track_data["pos_index"].astype(np.uint16) 
df_track_data["year"] = df_track_data["year"].astype(np.int16) 
df_track_data["driver_number"] = df_track_data["driver_number"].astype(np.int8) 

In [None]:
df_track_data.info()

In [None]:
df_track_data.to_parquet("./static/data/race_data.parquet")