In [1]:
#import
import fastf1
import os
import pandas as pd

ModuleNotFoundError: No module named 'fastf1'

In [None]:
#store data
fastf1.Cache.enable_cache(r"\cache")
#pick 2021-2024 data
years = [2021, 2022, 2023, 2024]
laps_data = {}
cleaned_laps_data = {}
preprocessed_laps = []

In [None]:
#detch all the race data from 2021-2024
for year in years:
    print(f"Loading Abu Dhabi {year} GP session...")
    session = fastf1.get_session(year, 'Abu Dhabi', 'R')
    session.load()
    laps_data[year] = session.laps


print("\nAll sessions loaded.")

Loading Abu Dhabi 2021 GP session...


core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['33', '44', '55', '22', '10', '77', '4', '14', '31', '16', '5', '3', '18', '47', '11', '6', '99', '63', '7', '9']
core           INFO 	Loading data for Abu Dhabi Grand Prix - R

Loading Abu Dhabi 2022 GP session...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '55', '63', '4', '31', '18', '3', '5', '22', '24', '23', '10', '77', '47', '20', '44', '6', '14']


Loading Abu Dhabi 2023 GP session...


core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '63', '11', '4', '81', '14', '22', '44', '18', '3', '31', '10', '23', '27', '2', '24', '55', '77', '20']


Loading Abu Dhabi 2024 GP session...


core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']



All sessions loaded.


## Data Cleaning

In [None]:
#go through all fetched data for cleaning
for year in years:
    print(f"Cleaning laps for {year}")
    df = laps_data[year].copy()

    #remove no lap time laps
    df = df[df['LapTime'].notna()]

    #keep accurate laps according to dataset
    df = df[df['IsAccurate'] == True]

    #if tires compound missing j use label as unkown
    df.loc[:, 'Compound'] = df['Compound'].fillna('UNKNOWN')

    cleaned_laps_data[year] = df
    print(f"   -> Cleaned laps: {df.shape[0]} rows\n")


print(f"\nAll laps cleaned.\n") 

Cleaning laps for 2021
   -> Cleaned laps: 818 rows

Cleaning laps for 2022
   -> Cleaned laps: 1032 rows

Cleaning laps for 2023
   -> Cleaned laps: 1063 rows

Cleaning laps for 2024
   -> Cleaned laps: 921 rows


All laps cleaned.



## Data Preprocessing

In [None]:
for year in years:
    print(f"Preprocessing cleaned laps for {year}\n")
    df = cleaned_laps_data[year].copy()

    #new column for laptime in seconds
    df['LapTimeSeconds'] = df['LapTime'].dt.total_seconds()

    #append year column
    df['Year'] = year

    preprocessed_laps.append(df)

print("Preprocessing complete.\n")

Preprocessing cleaned laps for 2021

Preprocessing cleaned laps for 2022

Preprocessing cleaned laps for 2023

Preprocessing cleaned laps for 2024

Preprocessing complete.



In [None]:
#create directory for data storage
output_dir = r"C:\data"

#make a dataframe with all the preprocessed laps
combined_df = pd.concat(preprocessed_laps, ignore_index=True)

#keeping only the needed columns
combined_df = combined_df[[
    'Year', 'Driver', 'Stint', 'LapNumber', 'Compound',
    'LapTimeSeconds', 'TrackStatus', 'PitInTime', 'PitOutTime'
]]

#save the data to a csv
output_path = os.path.join(output_dir, "lap_time.csv")
combined_df.to_csv(output_path, index=False)

print(f"Preprocessed data saved to: {output_path}")