In [2]:
# import autosklearn.classification
import sklearn.datasets
import sklearn.metrics
from pprint import pprint
from tabpfn import TabPFNClassifier
import numpy as np
from pathlib import Path
import pandas as pd
import time
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [None]:
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

In [None]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder="/tmp/autosklearn_interpretable_models_example_tmp",
    include={
        "classifier": ["decision_tree", "lda", "sgd"],
        "feature_preprocessor": [
            "no_preprocessing",
            "polynomial",
            "select_percentile_classification",
        ],
    },
    ensemble_kwargs={"ensemble_size": 1},
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

In [None]:
pprint(automl.show_models(), indent=4)

In [None]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

In [None]:
# N_ensemble_configurations defines how many estimators are averaged, it is bounded by #features * #classes
# more ensemble members are slower, but more accurate
classifier = TabPFNClassifier(device="cuda", N_ensemble_configurations=4)

In [None]:
start = time.time()
classifier.fit(X_train, y_train)
y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)
print(
    "Prediction time: ", time.time() - start, "Accuracy", accuracy_score(y_test, y_eval)
)

In [None]:
# We also offer the `predict_proba` interface
classifier.predict_proba(X_test).shape

In [None]:
out_table = pd.DataFrame(X_test.copy().astype(str))
out_table["prediction"] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)]
out_table