In [3]:
import pandas as pd
import numpy as np
import sklearn
import fastf1

In [6]:
pip install tqdm

Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.


In [7]:
from tqdm import tqdm

In [11]:
def get_session_data(year, session_type, event_list=None):
    all_data = []

    # Use full schedule if available
    if event_list is None:
        schedule = fastf1.get_event_schedule(year)
        event_list = schedule['EventName'].tolist()

    for gp_name in tqdm(event_list, desc=f"{year} {session_type}"):
        try:
            session = fastf1.get_session(year, gp_name, session_type)
            session.load()

            laps = session.laps
            if laps.empty:
                continue

            laps['Year'] = year
            laps['EventName'] = gp_name
            laps['SessionType'] = session_type
            all_data.append(laps)

        except Exception as e:
            print(f"Skipped {year} {gp_name} {session_type}: {e}")
            continue

    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()


In [12]:
df_q_2024 = get_session_data(2024, 'Q')
df_r_2024 = get_session_data(2024, 'R')

core           INFO 	Loading data for Singapore Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '44', '63', '81', '27', '14', '22', '16', '55', '23', '43', '11', '20', '31', '3', '18', '10', '77', '24']
2024 Q:   4%|█▍                                  | 1/25 [00:00<00:18,  1.28it/s]core           INFO 	Loadi

Skipped 2024 United States Grand Prix R: The data you are trying to access has not been loaded yet. See `Session.load`


req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
req            INFO 	No cached data found for race_control_messages. Loading data...
_api           INFO 	Fetching race contr

Skipped 2024 Mexico City Grand Prix R: The data you are trying to access has not been loaded yet. See `Session.load`


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2024 R:  88%|██████████████████████████████▊    | 22/25 [00:30<00:04,  1.62s/it]

Skipped 2024 São Paulo Grand Prix R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2024 R:  92%|████████████████████████████████▏  | 23/25 [00:31<00:02,  1.36s/it]

Skipped 2024 Las Vegas Grand Prix R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2024 R:  96%|█████████████████████████████████▌ | 24/25 [00:31<00:01,  1.17s/it]

Skipped 2024 Qatar Grand Prix R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2024 R: 100%|███████████████████████████████████| 25/25 [00:32<00:00,  1.31s/it]

Skipped 2024 Abu Dhabi Grand Prix R: Failed to load any schedule data.





In [13]:
completed_2025_races = [
    'Bahrain', 'Saudi Arabia', 'Australia', 'Japan',
    'China', 'Miami', 'Emilia Romagna', 'Monaco',
    'Canada', 'Spain', 'Austria', 'Great Britain'
]

In [14]:
df_q_2025 = get_session_data(2025, 'Q', completed_2025_races)
df_r_2025 = get_session_data(2025, 'R', completed_2025_races)

req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:   8%|███                                 | 1/12 [00:00<00:05,  1.95it/s]

Skipped 2025 Bahrain Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  17%|██████                              | 2/12 [00:01<00:06,  1.53it/s]

Skipped 2025 Saudi Arabia Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  25%|█████████                           | 3/12 [00:02<00:06,  1.44it/s]

Skipped 2025 Australia Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  33%|████████████                        | 4/12 [00:02<00:05,  1.39it/s]

Skipped 2025 Japan Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  42%|███████████████                     | 5/12 [00:03<00:05,  1.37it/s]

Skipped 2025 China Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  50%|██████████████████                  | 6/12 [00:04<00:04,  1.36it/s]

Skipped 2025 Miami Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  58%|█████████████████████               | 7/12 [00:05<00:03,  1.35it/s]

Skipped 2025 Emilia Romagna Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  67%|████████████████████████            | 8/12 [00:05<00:02,  1.34it/s]

Skipped 2025 Monaco Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  75%|███████████████████████████         | 9/12 [00:06<00:02,  1.34it/s]

Skipped 2025 Canada Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  83%|█████████████████████████████▏     | 10/12 [00:07<00:01,  1.34it/s]

Skipped 2025 Spain Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q:  92%|████████████████████████████████   | 11/12 [00:08<00:00,  1.34it/s]

Skipped 2025 Austria Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 Q: 100%|███████████████████████████████████| 12/12 [00:08<00:00,  1.37it/s]


Skipped 2025 Great Britain Q: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:   8%|███                                 | 1/12 [00:00<00:08,  1.35it/s]

Skipped 2025 Bahrain R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  17%|██████                              | 2/12 [00:01<00:07,  1.34it/s]

Skipped 2025 Saudi Arabia R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  25%|█████████                           | 3/12 [00:02<00:06,  1.34it/s]

Skipped 2025 Australia R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  33%|████████████                        | 4/12 [00:02<00:05,  1.33it/s]

Skipped 2025 Japan R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  42%|███████████████                     | 5/12 [00:03<00:05,  1.33it/s]

Skipped 2025 China R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  50%|██████████████████                  | 6/12 [00:04<00:04,  1.33it/s]

Skipped 2025 Miami R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  58%|█████████████████████               | 7/12 [00:05<00:03,  1.33it/s]

Skipped 2025 Emilia Romagna R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  67%|████████████████████████            | 8/12 [00:05<00:02,  1.34it/s]

Skipped 2025 Monaco R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  75%|███████████████████████████         | 9/12 [00:06<00:02,  1.33it/s]

Skipped 2025 Canada R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  83%|█████████████████████████████▏     | 10/12 [00:07<00:01,  1.33it/s]

Skipped 2025 Spain R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R:  92%|████████████████████████████████   | 11/12 [00:08<00:00,  1.33it/s]

Skipped 2025 Austria R: Failed to load any schedule data.


req            INFO 	No cached data found for season_schedule. Loading data...
_api           INFO 	Fetching season schedule...
2025 R: 100%|███████████████████████████████████| 12/12 [00:08<00:00,  1.33it/s]

Skipped 2025 Great Britain R: Failed to load any schedule data.





In [18]:
df_all = pd.concat([df_q_2024, df_r_2024, df_q_2025, df_r_2025], ignore_index=True)

In [19]:
df_all.to_csv('f1_2024_2025_q_r.csv', index=False)

In [21]:
df_all.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,Year,EventName,SessionType
0,0 days 00:17:36.482000,NOR,4,NaT,1.0,1.0,0 days 00:15:24.715000,NaT,NaT,0 days 00:00:51.613000,...,2024-09-21 13:01:38.552,1,,False,,False,False,2024,Pre-Season Testing,Q
1,0 days 00:19:07.206000,NOR,4,0 days 00:01:30.724000,2.0,1.0,NaT,NaT,0 days 00:00:26.863000,0 days 00:00:38.252000,...,2024-09-21 13:03:50.319,1,,False,,False,True,2024,Pre-Season Testing,Q
2,0 days 00:21:48.149000,NOR,4,NaT,3.0,1.0,NaT,0 days 00:21:12.899000,0 days 00:00:36.760000,0 days 00:00:53.201000,...,2024-09-21 13:05:21.043,1,,False,,False,False,2024,Pre-Season Testing,Q
3,0 days 00:26:55.559000,NOR,4,NaT,4.0,2.0,0 days 00:24:57.106000,NaT,NaT,0 days 00:00:50.524000,...,2024-09-21 13:08:01.986,1,,False,,False,False,2024,Pre-Season Testing,Q
4,0 days 00:28:25.561000,NOR,4,0 days 00:01:30.002000,5.0,2.0,NaT,NaT,0 days 00:00:26.735000,0 days 00:00:37.824000,...,2024-09-21 13:13:09.396,1,,False,,False,True,2024,Pre-Season Testing,Q


In [24]:
df_all.describe()

Unnamed: 0,Time,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,...,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,TyreLife,LapStartTime,LapStartDate,Position,Year
count,29324,26595,29324.0,29324.0,2996,2994,26900,29188,28742,26853,...,28742,25841.0,29187.0,26313.0,26849.0,29321.0,29324,29314,21441.0,29324.0
mean,0 days 01:27:21.208331196,0 days 00:01:33.997582665,24.986427,2.266505,0 days 00:52:15.720604138,0 days 00:56:35.817704074,0 days 00:00:29.350149182,0 days 00:00:35.699851205,0 days 00:00:28.660994920,0 days 01:29:52.762306669,...,0 days 01:27:49.158981977,252.699934,244.86028,271.913655,287.648255,11.76979,0 days 01:25:24.911316634,2024-06-30 07:45:05.632425728,9.939089,2024.0
min,0 days 00:05:49.596000,0 days 00:01:04.314000,1.0,1.0,0 days 00:04:17.809000,0 days 00:15:19.946000,0 days 00:00:16.251000,0 days 00:00:17.289000,0 days 00:00:17.503000,0 days 00:06:17.920000,...,0 days 00:05:49.775000,45.0,26.0,1.0,25.0,1.0,0 days 00:04:05.608000,2024-03-01 16:00:11.226000,1.0,2024.0
25%,0 days 01:00:05.225250,0 days 00:01:21.561000,9.0,1.0,0 days 00:26:05.572250,0 days 00:32:03.392000,0 days 00:00:25.645000,0 days 00:00:29.481000,0 days 00:00:23.791000,0 days 01:03:47.417000,...,0 days 01:00:25.524000,214.0,211.0,252.0,282.0,3.0,0 days 00:57:18.985000,2024-05-05 20:56:54.634000128,5.0,2024.0
50%,0 days 01:26:39.062000,0 days 00:01:32.296000,20.0,2.0,0 days 00:48:00.754000,0 days 00:53:33.982000,0 days 00:00:28.927500,0 days 00:00:34.781500,0 days 00:00:27.133500,0 days 01:30:31.180000,...,0 days 01:27:29.444500,269.0,256.0,275.0,298.0,8.0,0 days 01:24:56.201000,2024-06-30 13:09:53.039500032,10.0,2024.0
75%,0 days 01:58:32.159500,0 days 00:01:39.727500,39.0,3.0,0 days 01:11:28.035000,0 days 01:14:48.272250,0 days 00:00:31.553250,0 days 00:00:41.975000,0 days 00:00:30.526000,0 days 02:00:14.422000,...,0 days 01:59:18.344250,288.0,272.0,290.0,311.0,17.0,0 days 01:57:03.156250,2024-08-31 14:17:37.221999872,15.0,2024.0
max,0 days 03:19:21.748000,0 days 00:42:06.253000,78.0,8.0,0 days 03:11:24.136000,0 days 03:10:59.688000,0 days 00:01:39.110000,0 days 00:01:30.308000,0 days 00:01:37.875000,0 days 03:18:26.157000,...,0 days 03:19:21.804000,357.0,344.0,357.0,357.0,78.0,0 days 03:18:06.123000,2024-12-07 15:03:47.162000,20.0,2024.0
std,0 days 00:39:48.015479706,0 days 00:01:00.587069416,18.526038,1.221898,0 days 00:31:27.312103619,0 days 00:29:26.562635075,0 days 00:00:07.183371615,0 days 00:00:08.377517168,0 days 00:00:07.959559263,0 days 00:39:05.429777585,...,0 days 00:39:55.721695126,47.109084,45.214469,30.25289,43.089262,11.259875,0 days 00:40:11.935732125,,5.507747,0.0


In [28]:
df_all.drop(columns=['SpeedI1','SpeedI2', 'SpeedST', 'SpeedFL'], inplace=True)


In [29]:
df_all.dtypes

Time                  timedelta64[ns]
Driver                         object
DriverNumber                   object
LapTime               timedelta64[ns]
LapNumber                     float64
Stint                         float64
PitOutTime            timedelta64[ns]
PitInTime             timedelta64[ns]
Sector1Time           timedelta64[ns]
Sector2Time           timedelta64[ns]
Sector3Time           timedelta64[ns]
Sector1SessionTime    timedelta64[ns]
Sector2SessionTime    timedelta64[ns]
Sector3SessionTime    timedelta64[ns]
IsPersonalBest                 object
Compound                       object
TyreLife                      float64
FreshTyre                        bool
Team                           object
LapStartTime          timedelta64[ns]
LapStartDate           datetime64[ns]
TrackStatus                    object
Position                      float64
Deleted                        object
DeletedReason                  object
FastF1Generated                  bool
IsAccurate  

In [32]:
df_all.drop(columns=['LapNumber','Stint','PitOutTime','PitInTime','Sector1Time',
                     'Sector2Time','Sector3Time','Sector1SessionTime','Sector2SessionTime',
                     'Sector3SessionTime','FreshTyre','TrackStatus','Deleted','DeletedReason',
                     'FastF1Generated','IsAccurate','LapStartTime','LapStartDate'],inplace=True)

In [36]:
df_all.drop(columns=['Time','DriverNumber','IsPersonalBest'],inplace=True)

In [37]:
df_all.dtypes

Driver                  object
LapTime        timedelta64[ns]
Compound                object
TyreLife               float64
Team                    object
Position               float64
Year                     int64
EventName               object
SessionType             object
dtype: object

In [38]:
df_all['LapTimeSeconds'] = df_all['LapTime'].dt.total_seconds()
df_all.drop(columns=['LapTime'], inplace=True)

In [43]:
driver_dummies = pd.get_dummies(df_all['Driver'], prefix='driver')
df_all = pd.concat([df_all, driver_dummies], axis=1)

In [44]:
team_dummies = pd.get_dummies(df_all['Team'], prefix='team')
df_all = pd.concat([df_all, team_dummies], axis=1)

In [45]:
compound_dummies = pd.get_dummies(df_all['Compound'], prefix='compound')
df_all = pd.concat([df_all, compound_dummies], axis=1)

In [46]:
sessiontype_dummies = pd.get_dummies(df_all['SessionType'], prefix='sessiontype')
df_all = pd.concat([df_all, sessiontype_dummies], axis=1)

In [47]:
df_all.dtypes

Driver                    object
Compound                  object
TyreLife                 float64
Team                      object
Position                 float64
Year                       int64
EventName                 object
SessionType               object
LapTimeSeconds           float64
driver_ALB                  bool
driver_ALO                  bool
driver_BEA                  bool
driver_BOT                  bool
driver_COL                  bool
driver_DOO                  bool
driver_GAS                  bool
driver_HAM                  bool
driver_HUL                  bool
driver_LAW                  bool
driver_LEC                  bool
driver_MAG                  bool
driver_NOR                  bool
driver_OCO                  bool
driver_PER                  bool
driver_PIA                  bool
driver_RIC                  bool
driver_RUS                  bool
driver_SAI                  bool
driver_SAR                  bool
driver_STR                  bool
driver_TSU

In [49]:
df_all.drop(['Driver'],axis=1,inplace=True)
df_all.drop(['Compound'],axis=1,inplace=True)
df_all.drop(['SessionType'],axis=1,inplace=True)
df_all.drop(['Team'],axis=1,inplace=True)

In [50]:
from sklearn.preprocessing import LabelEncoder

In [51]:
le_event = LabelEncoder()
df_all['EventEncoded'] = le_event.fit_transform(df_all['EventName'])

In [52]:
df_all.drop(columns=['EventName'],axis=1, inplace=True)

In [53]:
df_all.dtypes

TyreLife                 float64
Position                 float64
Year                       int64
LapTimeSeconds           float64
driver_ALB                  bool
driver_ALO                  bool
driver_BEA                  bool
driver_BOT                  bool
driver_COL                  bool
driver_DOO                  bool
driver_GAS                  bool
driver_HAM                  bool
driver_HUL                  bool
driver_LAW                  bool
driver_LEC                  bool
driver_MAG                  bool
driver_NOR                  bool
driver_OCO                  bool
driver_PER                  bool
driver_PIA                  bool
driver_RIC                  bool
driver_RUS                  bool
driver_SAI                  bool
driver_SAR                  bool
driver_STR                  bool
driver_TSU                  bool
driver_VER                  bool
driver_ZHO                  bool
team_Alpine                 bool
team_Aston Martin           bool
team_Ferra

In [56]:

bool_cols = df_all.select_dtypes(include='bool').columns
df_all[bool_cols] = df_all[bool_cols].astype(int)


In [58]:
df_all.head()

Unnamed: 0,TyreLife,Position,Year,LapTimeSeconds,driver_ALB,driver_ALO,driver_BEA,driver_BOT,driver_COL,driver_DOO,...,team_Williams,compound_HARD,compound_INTERMEDIATE,compound_MEDIUM,compound_None,compound_SOFT,compound_WET,sessiontype_Q,sessiontype_R,EventEncoded
0,1.0,,2024,,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,18
1,2.0,,2024,90.724,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,18
2,3.0,,2024,,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,18
3,4.0,,2024,,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,18
4,5.0,,2024,90.002,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,18


In [60]:
df_all = df_all.dropna(subset=['Position'])

In [61]:
df_all = df_all.dropna(subset=['LapTimeSeconds'])

In [62]:
df_all.isna().sum()

TyreLife                 0
Position                 0
Year                     0
LapTimeSeconds           0
driver_ALB               0
driver_ALO               0
driver_BEA               0
driver_BOT               0
driver_COL               0
driver_DOO               0
driver_GAS               0
driver_HAM               0
driver_HUL               0
driver_LAW               0
driver_LEC               0
driver_MAG               0
driver_NOR               0
driver_OCO               0
driver_PER               0
driver_PIA               0
driver_RIC               0
driver_RUS               0
driver_SAI               0
driver_SAR               0
driver_STR               0
driver_TSU               0
driver_VER               0
driver_ZHO               0
team_Alpine              0
team_Aston Martin        0
team_Ferrari             0
team_Haas F1 Team        0
team_Kick Sauber         0
team_McLaren             0
team_Mercedes            0
team_RB                  0
team_Red Bull Racing     0
t

In [69]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score

In [70]:
X = df_all.drop(columns=['Position'])  
y = df_all['Position']

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [67]:
model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

model.fit(X_train, y_train)

0,1,2
,loss,'log_loss'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0


In [73]:
y_pred = model.predict(X_test)

In [74]:
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, digits=3))
print("\n📉 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

✅ Accuracy: 0.4647127784290739

📊 Classification Report:
               precision    recall  f1-score   support

         1.0      0.701     0.775     0.736       227
         2.0      0.641     0.706     0.672       228
         3.0      0.598     0.617     0.607       227
         4.0      0.530     0.586     0.556       227
         5.0      0.429     0.388     0.407       227
         6.0      0.588     0.601     0.594       228
         7.0      0.377     0.395     0.385       228
         8.0      0.446     0.493     0.469       227
         9.0      0.460     0.427     0.443       227
        10.0      0.427     0.493     0.458       227
        11.0      0.377     0.367     0.372       226
        12.0      0.412     0.301     0.348       226
        13.0      0.460     0.387     0.420       225
        14.0      0.393     0.422     0.407       225
        15.0      0.345     0.360     0.352       225
        16.0      0.311     0.297     0.304       222
        17.0      0.344

In [75]:
import joblib

In [94]:
fastf1.Cache.enable_cache('cache_dir')

session = fastf1.get_session(2025, 'British Grand Prix', 'Q')
session.load()

laps = session.laps
drivers = laps['Driver'].unique()

quickest_laps = []
for drv in drivers:
    drv_laps = laps[laps['Driver'] == drv]
    drv_laps = drv_laps.dropna(subset=['LapTime']) 
    if not drv_laps.empty:
        fastest = drv_laps.loc[drv_laps['LapTime'].idxmin()]
        quickest_laps.append(fastest)

df = pd.DataFrame(quickest_laps)

df = df[['Driver', 'Team', 'Compound', 'TyreLife', 'LapTime']].copy()
df['LapTimeSeconds'] = df['LapTime'].dt.total_seconds()
df['SessionType'] = 1  
df['Year'] = 2025
df['EventName'] = session.event['EventName']

model = joblib.load('gbc_model.pkl')
event_encoder = joblib.load('label_encoder_eventname.pkl')
train_columns = joblib.load('train_columns.pkl')

df['EventEncoded'] = event_encoder.transform([df['EventName'].iloc[0]])[0]

df.drop(columns=['LapTime', 'EventName'], inplace=True)

df_encoded = pd.get_dummies(df, columns=['Driver', 'Team', 'Compound'], drop_first=False)

for col in train_columns:
    if col not in df_encoded.columns:
        df_encoded[col] = 0
df_encoded = df_encoded[train_columns]

y_pred = model.predict(df_encoded)
df['PredictedPosition'] = y_pred

podium = df.sort_values(by='PredictedPosition').head(3)
print("🏁 Predicted Podium for 2025 British Grand Prix:")
print(podium[['Driver', 'Team', 'PredictedPosition']])



core           INFO 	Loading data for British Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '4', '63', '44', '16', '12', '87', '14', '10', '55', '22', '6', '23', '31', '30', '5', '18', '27', '43']


🏁 Predicted Podium for 2025 British Grand Prix:
    Driver             Team  PredictedPosition
16     VER  Red Bull Racing               17.0
271    STR     Aston Martin               17.0
265    BOR      Kick Sauber               17.0
