In [41]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tabpfn import TabPFNClassifier
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

In [42]:
train = pd.read_csv("../data/grandprix_features_train.csv")
val = pd.read_csv("../data/grandprix_features_val.csv")
test  = pd.read_csv("../data/grandprix_features_test.csv")

test.head()


Unnamed: 0,year,driver_id,constructor_id,circuit_id,grid_position,quali_delta,quali_tm_delta,season_pts_driver,season_pts_team,last_3_avg,is_street_circuit,is_wet,points_scored
0,2025,norris,mclaren,melbourne,1,0.0,-0.084,0.0,0.0,0.0,1,1,1
1,2025,verstappen,red_bull_racing,melbourne,3,0.385,-1.613,0.0,0.0,0.0,1,1,1
2,2025,russell,mercedes,melbourne,4,0.45,-0.979,0.0,0.0,0.0,1,1,1
3,2025,antonelli,mercedes,melbourne,16,1.429,0.979,0.0,15.0,0.0,1,1,1
4,2025,albon,williams,melbourne,6,0.641,-0.194,0.0,0.0,0.0,1,1,1


In [43]:
CAT_COLS = [
    "driver_id",
    "constructor_id",
    "circuit_id",
]
NUM_COLS = [
    "grid_position",
    "quali_delta",
    "quali_tm_delta",
    "season_pts_driver",
    "season_pts_team",
    "last_3_avg",
    "is_street_circuit",
    "is_wet",
]

TARGET = 'points_scored'
for col in CAT_COLS:
    all_vals = pd.concat([train[col], val[col], test[col]], axis=0)
    codes = all_vals.astype("category").cat.categories
    mapping = {v: i for i, v in enumerate(codes)}
    for df in (train, val, test):
        df[col] = df[col].map(mapping).astype("int64")
        
X_train, y_train = train[CAT_COLS + NUM_COLS], train[TARGET]
X_val, y_val = val[CAT_COLS + NUM_COLS], val[TARGET]
X_test, y_test = test[CAT_COLS + NUM_COLS], test[TARGET]

X_train

Unnamed: 0,driver_id,constructor_id,circuit_id,grid_position,quali_delta,quali_tm_delta,season_pts_driver,season_pts_team,last_3_avg,is_street_circuit,is_wet
0,20,12,14,1,0.000,-0.594,0.0,0.0,0.000000,1,0
1,43,12,14,2,0.594,0.594,0.0,25.0,0.000000,1,0
2,55,5,14,4,1.415,-0.048,0.0,0.0,0.000000,1,0
3,31,21,14,3,1.391,-0.078,0.0,0.0,0.000000,1,0
4,34,19,14,11,2.473,-2.576,0.0,0.0,0.000000,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3735,47,21,36,20,1.087,0.234,1.0,26.0,0.000000,0,0
3736,57,0,36,19,1.669,0.326,6.0,16.0,0.000000,0,0
3737,46,5,36,16,1.293,1.154,178.0,363.0,9.333333,0,0
3738,6,0,36,18,1.343,-0.326,10.0,16.0,0.000000,0,0


In [44]:
cat_idx = [X_train.columns.get_loc(c) for c in CAT_COLS]

In [45]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clf = TabPFNClassifier(device=device, categorical_features_indices=cat_idx)
clf.fit(X_train, y_train)

0,1,2
,n_estimators,8
,categorical_features_indices,"[0, 1, ...]"
,softmax_temperature,0.9
,balance_probabilities,False
,average_before_softmax,False
,model_path,'auto'
,device,device(type='cuda')
,ignore_pretraining_limits,False
,inference_precision,'auto'
,fit_mode,'fit_preprocessors'


In [46]:
predictions = clf.predict(X_val)
print("Accuracy", accuracy_score(y_val, predictions))

Accuracy 0.8350730688935282
