## IMPORT LIBRARIES AND CACHE


In [1]:
import fastf1
import fastf1.plotting
from fastf1.core import Laps
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
import uuid
from datetime import datetime
import os

In [2]:
fastf1.Cache.enable_cache('cache')

## SELECT RACE AND YEAR

In [None]:
race_name="Bahrain"
year=2024
race = fastf1.get_session(year, race_name, 'R')
race.load()

## CREATE TABLE DRIVERS AND GPs

Get the year in the previous cell and according to that, store all the drivers in a csv that have raced in this year. And also all the Grand Prix in another csv currently made. It is done in one function to make the load only once.

FIELDS: GP: ID,Name,Year,Location,Country,EventDate
        Drivers: ID,DriverNumber,FullName,Abbreviation

In [None]:
import fastf1
import pandas as pd
from datetime import datetime

def obtener_datos_anuales(year):
    pilotos_data = []
    GP_data = []
    seen_drivers = set()  # Para evitar duplicados
    
    calendario = fastf1.get_event_schedule(year, include_testing=False)
    calendario = calendario[calendario['EventDate'] <= datetime.now()]

    for _, evento in calendario.iterrows():
        try:
            sesion = fastf1.get_session(year, evento['RoundNumber'], 'R')
            sesion.load()

            for driver in sesion.drivers:
                if driver not in seen_drivers:
                    info_piloto = sesion.get_driver(driver)
                    pilotos_data.append({
                        'ID': len(pilotos_data),
                        'DriverNumber': info_piloto['DriverNumber'],
                        'FullName': info_piloto['FullName'],
                        'Abbreviation': info_piloto['Abbreviation']
                    })
                    seen_drivers.add(driver)

            GP_data.append({
                'ID': len(GP_data),
                'Name': evento['EventName'],
                'Year': year,
                'Location': evento['Location'],
                'Country': evento['Country'],
                'EventDate': evento['EventDate']
            })
        except Exception as e:
            print(f"Error con el evento {evento['RaceName']}: {e}")
            continue

    pilotos_df = pd.DataFrame(pilotos_data)
    GP_df = pd.DataFrame(GP_data)
    
    return pilotos_df, GP_df

pilotos, eventos = obtener_datos_anuales(year)

pilotos.to_csv('./2024/pilotos.csv', index=False)
eventos.to_csv('./2024/GP.csv', index=False)



### CALCULATED VARIABLES
These tables will give data by run calculated the table will be "(GP name)_Grand_Prix" in the folder of calculated variables.

FIELDS: DriverNumber,Driver,AvgLapTime,InitialPosition,FinalPosition,AvgSector1,AvgSector2,AvgSector3,LapTimeSTD


In [None]:
def calcular_variables_gran_premio(year):
    calendario = fastf1.get_event_schedule(year, include_testing=False)
    calendario = calendario[calendario['EventDate'] <= datetime.now()]

    for _, evento in calendario.iterrows():

        try:
            sesion = fastf1.get_session(year, evento['RoundNumber'], 'R')
            sesion.load()

            driver_avg_lap_times = sesion.laps.groupby(['DriverNumber', 'Driver'])['LapTime'].mean().dt.total_seconds().round(3)
            driver_avg_lap_times_df = driver_avg_lap_times.reset_index()
            driver_avg_lap_times_df.columns = ['DriverNumber','Driver', 'AvgLapTime']

            lap_time_seconds = sesion.laps['LapTime'].dt.total_seconds()

            lap_time_std = lap_time_seconds.groupby(sesion.laps['Driver']).std().round(3)
            lap_time_std_df = lap_time_std.reset_index()
            lap_time_std_df.columns = ['Driver', 'LapTimeSTD']


            qualy = fastf1.get_session(year, evento['RoundNumber'], 'Q')
            qualy.load()

            initial_positions = qualy.results[['DriverNumber', 'Position']]
            initial_positions.columns = ['Driver', 'Grid']

            positions_df = pd.DataFrame(columns=['Driver', 'InitialPosition', 'FinalPosition'])
            for driver in sesion.drivers:
                laps = sesion.laps.pick_drivers(driver)
                driver_acronym = sesion.get_driver(driver)['Abbreviation']
                initial_position = initial_positions[initial_positions['Driver'] == driver]["Grid"].values[0]
                final_position = laps.iloc[-1]['Position']

                new_row = pd.DataFrame({
                    'Driver': [driver_acronym],
                    'InitialPosition': [initial_position],
                    'FinalPosition': [final_position]
                })

                positions_df = pd.concat([positions_df, new_row], ignore_index=True)

            average_sector_times = pd.DataFrame(columns=['Driver', 'AvgSector1', 'AvgSector2', 'AvgSector3'])
            for driver in sesion.drivers:
                laps = sesion.laps.pick_drivers(driver)
                driver_acronym = sesion.get_driver(driver)['Abbreviation']

                avg_sector1 = laps['Sector1Time'].mean().total_seconds()
                avg_sector2 = laps['Sector2Time'].mean().total_seconds()
                avg_sector3 = laps['Sector3Time'].mean().total_seconds()

                temp_df = pd.DataFrame({
                    'Driver': [driver_acronym],
                        'AvgSector1': [avg_sector1],
                        'AvgSector2': [avg_sector2],
                        'AvgSector3': [avg_sector3]
                })

                average_sector_times = pd.concat([average_sector_times, temp_df], ignore_index=True)

            final_df = driver_avg_lap_times_df.merge(positions_df, on='Driver')
            final_df = final_df.merge(average_sector_times, on='Driver')
            final_df = final_df.merge(lap_time_std_df, on='Driver')

            output_dir = './2024/calculated_variables'
            os.makedirs(output_dir, exist_ok=True)
            output_file = os.path.join(output_dir, f"{evento['EventName']}.csv")
            final_df.to_csv(output_file, index=False)

            print(f"Archivo guardado: {output_file}")
        
        except Exception as e:
            print(f"Error con el evento {evento['EventName']}: {e}")
            continue

calcular_variables_gran_premio(year)

## LAP DATA OF ALL LAPS AND ALL DRIVERS
Every lap generate each of this fields which are stored in another csv file.

FIELDS: DriverNumber,Driver,LapNumber,Compound,TyreLife,Sector1Time,Sector2Time,Sector3Time,LapTime

In [None]:
def calcular_datos_carreras(year):
    calendario = fastf1.get_event_schedule(year, include_testing=False)
    calendario = calendario[calendario['EventDate'] <= datetime.now()]

    for _, evento in calendario.iterrows():
        try:
            sesion = fastf1.get_session(year, evento['RoundNumber'], 'R')
            sesion.load()

            datos_pilotos = pd.DataFrame(columns=[
                'DriverNumber', 'Driver', 'LapNumber', 'Compound', 'TyreLife',
                'Sector1Time', 'Sector2Time', 'Sector3Time', 'LapTime'
            ])

            for driver in sesion.drivers:
                laps = sesion.laps.pick_driver(driver)
                
                driver_number = sesion.get_driver(driver)['DriverNumber']
                driver_acronym = sesion.get_driver(driver)['Abbreviation']

                temp_df = laps[['LapNumber', 'Compound', 'TyreLife', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'LapTime']].copy()
                temp_df['DriverNumber'] = driver_number
                temp_df['Driver'] = driver_acronym

                temp_df['Sector1Time'] = temp_df['Sector1Time'].dt.total_seconds()
                temp_df['Sector2Time'] = temp_df['Sector2Time'].dt.total_seconds()
                temp_df['Sector3Time'] = temp_df['Sector3Time'].dt.total_seconds()
                temp_df['LapTime'] = temp_df['LapTime'].dt.total_seconds()

                datos_pilotos = pd.concat([datos_pilotos, temp_df], ignore_index=True)

            datos_pilotos[['Sector1Time', 'Sector2Time', 'Sector3Time', 'LapTime']] = datos_pilotos[['Sector1Time', 'Sector2Time', 'Sector3Time', 'LapTime']].round(3)

            output_dir = f'./{year}/lapdata'
            os.makedirs(output_dir, exist_ok=True)

            event_name = evento['EventName'].replace(" ", "_")  # Reemplazar espacios por guiones bajos para el nombre del archivo
            output_file = os.path.join(output_dir, f"{event_name}.csv")
            datos_pilotos.to_csv(output_file, index=False)

            print(f"Archivo guardado: {output_file}")
        
        except Exception as e:
            print(f"Error con el evento {evento['EventName']}: {e}")
            continue

calcular_datos_carreras(2024)


## TIME SERIES
Collect data from the input race and store it in a table where is all the drivers data of the race

FIELDS: Date,RPM,Speed,nGear,Throttle,Brake,DRS,Time,SessionTime,DriverAhead,DistanceToDriverAhead,X,Y,LapNumber,TrackStatus,DriverNumber,Driver,ID

In [15]:
output_dir = f"./2024/timeSeries/"
output_file = os.path.join(output_dir, f"{race_name}_time_series.csv")
os.makedirs(output_dir, exist_ok=True)

all_data = []
idx_counter = 0

for driver in race.drivers:
    driver_data = race.laps.pick_driver(driver).get_telemetry().add_driver_ahead()
    lap_data = race.laps.pick_driver(driver)

    data = driver_data[['Date', 'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 
                        'Time', 'SessionTime', 'DriverAhead', 'DistanceToDriverAhead', 'X', 'Y']].copy()
    
    lap_numbers = []
    track_status = []

    for i, lap in lap_data.iterrows():
        lap_start = lap['LapStartTime']
        if i < len(lap_data) - 1:
            next_lap_start = lap_data.iloc[i + 1]['LapStartTime']
            lap_telemetry = driver_data[(driver_data['SessionTime'] >= lap_start) & 
                                        (driver_data['SessionTime'] < next_lap_start)]
        else:
            lap_telemetry = driver_data[driver_data['SessionTime'] >= lap_start]
        
        lap_numbers.extend([lap['LapNumber']] * len(lap_telemetry))
        track_status.extend([lap['TrackStatus']] * len(lap_telemetry))
        
    data['LapNumber'] = lap_numbers[:len(data)]
    data['TrackStatus'] = track_status[:len(data)]
    data['DriverNumber'] = driver
    data['Driver'] = race.get_driver(driver)['Abbreviation']
    data['ID'] = range(idx_counter, idx_counter + len(data))

    idx_counter += len(data)
    all_data.append(data)

all_data_df = pd.concat(all_data, ignore_index=True)
all_data_df.to_csv(output_file, index=False)
print(f"Datos exportados exitosamente a {output_file}")

Datos exportados exitosamente a ./2024/timeSeries/Bahrain_time_series.csv
