In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split

from tabpfn import TabPFNClassifier

# Load data
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=42
)

# Initialize a classifier
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

# Predict probabilities
prediction_probabilities = clf.predict_proba(X_test)
print("ROC AUC:", roc_auc_score(y_test, prediction_probabilities[:, 1]))

# Predict labels
predictions = clf.predict(X_test)
print("Accuracy", accuracy_score(y_test, predictions))

  model, _, config_ = load_model_criterion_config(


tabpfn-v2-classifier.ckpt:   0%|          | 0.00/29.0M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/37.0 [00:00<?, ?B/s]

Consider using a GPU or the tabpfn-client API: https://github.com/PriorLabs/tabpfn-client


ROC AUC: 0.9981992797118848
Accuracy 0.9824561403508771


In [5]:
# XGBoost Example
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
import time

# Load the breast cancer dataset
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Create and train XGBoost classifier
start_time = time.time()
xgb_clf = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    use_label_encoder=False,
    eval_metric="logloss",
    random_state=42,
)

# Train the model
xgb_clf.fit(X_train, y_train)

# Make predictions
y_pred = xgb_clf.predict(X_test)
y_pred_proba = xgb_clf.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
training_time = time.time() - start_time

print(f"Training time: {training_time:.2f} seconds")
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature importance
print("\nFeature Importance:")
for i, importance in enumerate(xgb_clf.feature_importances_):
    print(f"Feature {i}: {importance:.4f}")

Training time: 0.05 seconds
Accuracy: 0.9561

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


Feature Importance:
Feature 0: 0.0284
Feature 1: 0.0198
Feature 2: 0.0000
Feature 3: 0.0136
Feature 4: 0.0094
Feature 5: 0.0053
Feature 6: 0.0060
Feature 7: 0.3079
Feature 8: 0.0001
Feature 9: 0.0063
Feature 10: 0.0093
Feature 11: 0.0089
Feature 12: 0.0168
Feature 13: 0.0119
Feature 14: 0.0113
Feature 15: 0.0087
Feature 16: 0.0220
Feature 17: 0.0043
Feature 18: 0.0036
Feature 19: 0.0040
Feature 20: 0.0578
Feature 21: 0.0276
Feature 22: 0.1538
Feature 23: 0.0360
Feature 24: 0.0072
Feature 25: 0.0000
Feature 26: 0.0295
Feature 27: 0.1860
Feature 28: 0.0049
Feature 29: 0.0000


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
