In [8]:
# Importing all packages
import os
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
import shutil

# Enable cache memory for faster access
custom_cache_folder = "f1_cache"
os.makedirs(custom_cache_folder, exist_ok=True)
fastf1.Cache.enable_cache(custom_cache_folder)

# Import a rookie table
is_rookie = pd.read_csv("Dataset/isRookie.csv")
#print(is_rookie)

# Import a race name - Later this will be a user input
circuit_name = "Abu Dhabi"

# Function that formats data
def get_race_data(year,circuit):

    # Load race data
    session = fastf1.get_session(year, circuit, 'R')
    session.load()

    weather_data = session.weather_data

    all_drivers = session.laps["Driver"].unique()
    fastest_laps = []

    # Find fastest laps of all drivers from that session
    for driver in all_drivers:
        fastest_lap = session.laps.pick_drivers(driver).pick_fastest()
        if fastest_lap is not None and hasattr(fastest_lap, 'LapTime'):
            # Ensure LapTime is valid (check for NaN) - There was some issue with return types of pick_fastest()
            if pd.isna(fastest_lap['LapTime']):
                fastest_laps.append([driver, None])
            else:
                fastest_lap_seconds = fastest_lap['LapTime'].total_seconds()
                fastest_laps.append([driver, fastest_lap_seconds])
        else:
            fastest_laps.append([driver, None])  # Handle no valid lap scenario

    # Convert it to DataFrame for an easier merge with result table
    fastest_lap_df = pd.DataFrame(fastest_laps, columns = ["Abbreviation", "FastestLap"])
    #print(fastest_lap_df)
    
    results = session.results[["FullName","Abbreviation", "TeamName", "GridPosition", "Position", "Status"]]
    results["Year"] = year
    results["Humidity"] = weather_data["Humidity"].mean()
    results["Temperature"] = weather_data["AirTemp"].mean()
    results["Rain"] = weather_data["Rainfall"].mean()

    results = results.merge(fastest_lap_df, on="Abbreviation")
    
    return results

# Call function for all required years
session_2022 = get_race_data(2022,circuit_name)
session_2023 = get_race_data(2023,circuit_name)
session_2024 = get_race_data(2024,circuit_name)

# Merge all sessions into one
all_sessions = pd.concat([session_2022, session_2023, session_2024], ignore_index=True)

# Merge final table with rookie table
final_sessions = all_sessions.merge(is_rookie, on=["Year","Abbreviation"]) 
final_sessions



core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.4.4]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No 

Unnamed: 0,FullName,Abbreviation,TeamName,GridPosition,Position,Status,Year,Humidity,Temperature,Rain,FastestLap,isRookie
0,Max Verstappen,VER,Red Bull Racing,1.0,1.0,Finished,2022,61.683871,28.575484,0.0,89.392,False
1,Charles Leclerc,LEC,Ferrari,3.0,2.0,Finished,2022,61.683871,28.575484,0.0,89.719,False
2,Sergio Perez,PER,Red Bull Racing,2.0,3.0,Finished,2022,61.683871,28.575484,0.0,88.972,False
3,Carlos Sainz,SAI,Ferrari,4.0,4.0,Finished,2022,61.683871,28.575484,0.0,88.879,False
4,George Russell,RUS,Mercedes,6.0,5.0,Finished,2022,61.683871,28.575484,0.0,88.836,False
5,Lando Norris,NOR,McLaren,7.0,6.0,Finished,2022,61.683871,28.575484,0.0,88.391,False
6,Esteban Ocon,OCO,Alpine,8.0,7.0,Finished,2022,61.683871,28.575484,0.0,89.333,False
7,Lance Stroll,STR,Aston Martin,14.0,8.0,Finished,2022,61.683871,28.575484,0.0,89.62,False
8,Daniel Ricciardo,RIC,McLaren,13.0,9.0,Finished,2022,61.683871,28.575484,0.0,90.785,False
9,Sebastian Vettel,VET,Aston Martin,9.0,10.0,Finished,2022,61.683871,28.575484,0.0,90.312,False
