In [7]:
# Importing all packages
import os
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
import shutil

# Enable cache memory for faster access
custom_cache_folder = "f1_cache"
os.makedirs(custom_cache_folder, exist_ok=True)
fastf1.Cache.enable_cache(custom_cache_folder)

# Import a rookie table
is_rookie = pd.read_csv("Dataset/isRookie.csv")
#print(is_rookie)

# Import a race name - Later this will be a user input
circuit_name = "Abu Dhabi"

# Function that formats data
def get_race_data(year,circuit):

    # Load race data
    session = fastf1.get_session(year, circuit, 'R')
    session.load()

    weather_data = session.weather_data

    all_drivers = session.laps["Driver"].unique()
    fastest_laps = []

    # Find fastest laps of all drivers from that session
    for driver in all_drivers:
        fastest_lap = session.laps.pick_drivers(driver).pick_fastest()
        if fastest_lap is not None and hasattr(fastest_lap, 'LapTime'):
            # Ensure LapTime is valid (check for NaN) - There was some issue with return types of pick_fastest()
            if pd.isna(fastest_lap['LapTime']):
                fastest_laps.append([driver, None])
            else:
                fastest_lap_seconds = fastest_lap['LapTime'].total_seconds()
                fastest_laps.append([driver, fastest_lap_seconds])
        else:
            fastest_laps.append([driver, None])  # Handle no valid lap scenario

    # Convert it to DataFrame for an easier merge with result table
    fastest_lap_df = pd.DataFrame(fastest_laps, columns = ["Abbreviation", "FastestLap"])
    #print(fastest_lap_df)
    
    results = session.results[["FullName","Abbreviation", "TeamName", "GridPosition", "Position", "Status"]]
    results["Year"] = year
    results["Humidity"] = weather_data["Humidity"].mean()
    results["Temperature"] = weather_data["AirTemp"].mean()
    results["Rain"] = weather_data["Rainfall"].mean()

    results = results.merge(fastest_lap_df, on="Abbreviation")
    
    return results

# Call function for all required years
session_2022 = get_race_data(2022,circuit_name)
session_2023 = get_race_data(2023,circuit_name)
session_2024 = get_race_data(2024,circuit_name)

# Merge all sessions into one
all_sessions = pd.concat([session_2022, session_2023, session_2024], ignore_index=True)

# Merge final table with rookie table
final_sessions = all_sessions.merge(is_rookie, on=["Year","Abbreviation"]) 
final_sessions



core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '55', '63', '4', '31', '18', '3', '5', '22', '24', '23', '10', '77', '47', '20', '44', '6', '14']
A value is trying to be set on a copy of a slice from a DataF

Error deleting f1_cache\fastf1_http_cache.sqlite: [WinError 32] The process cannot access the file because it is being used by another process: 'f1_cache\\fastf1_http_cache.sqlite'
