In [81]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tabpfn import TabPFNClassifier
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

In [82]:
train = pd.read_csv("../data/grandprix_features_train.csv")
val = pd.read_csv("../data/grandprix_features_val.csv")
test  = pd.read_csv("../data/grandprix_features_test.csv")

test.head()

Unnamed: 0,year,driver_id,constructor_id,circuit_id,grid_position,quali_delta,quali_tm_delta,season_pts_driver,season_pts_team,last_3_avg,is_street_circuit,is_wet,points_scored
0,2025,norris,mclaren,melbourne,1,0.0,-0.084,0.0,0.0,0.0,1,1,1
1,2025,verstappen,red_bull_racing,melbourne,3,0.385,-1.613,0.0,0.0,0.0,1,1,1
2,2025,russell,mercedes,melbourne,4,0.45,-0.979,0.0,0.0,0.0,1,1,1
3,2025,antonelli,mercedes,melbourne,16,1.429,0.979,0.0,15.0,0.0,1,1,1
4,2025,albon,williams,melbourne,6,0.641,-0.194,0.0,0.0,0.0,1,1,1


In [83]:
CAT_COLS = [
    "driver_id",
    "constructor_id",
    "circuit_id",
]
NUM_COLS = [
    "grid_position",
    "quali_delta",
    "quali_tm_delta",
    "season_pts_driver",
    "season_pts_team",
    "last_3_avg",
    "is_street_circuit",
    "is_wet",
]

TARGET = 'points_scored'

X_train, y_train = train[CAT_COLS + NUM_COLS], train[TARGET]
X_val, y_val = val[CAT_COLS + NUM_COLS], val[TARGET]
X_test, y_test = test[CAT_COLS + NUM_COLS], test[TARGET]

In [84]:
preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False),
CAT_COLS),
        ("num", "passthrough", NUM_COLS),
    ],
    remainder="drop",
    sparse_threshold=0.0,
)

In [85]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clf = TabPFNClassifier()
model = Pipeline([("prep", preprocess), ("clf", clf)])

In [86]:
model.fit(train[CAT_COLS + NUM_COLS], y_train)
predictions = model.predict(X_val)
print("Accuracy", accuracy_score(y_val, predictions))

Accuracy 0.8308977035490606
