### Imports:

In [8]:
import numpy as np
from pathlib import Path
import pandas as pd
import time
import torch.nn as nn
import torch

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from torch.utils.data import DataLoader, Dataset

from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer, load_iris, load_wine
from sklearn.model_selection import train_test_split
from tabpfn.scripts.decision_boundary import DecisionBoundaryDisplay
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.preprocessing import LabelEncoder


from tabpfn.scripts.transformer_prediction_interface import TabPFNClassifier, load_model_workflow, transformer_predict, get_params_from_config
import torch.optim as optim

In [9]:
class SklearnDataset(Dataset):
    def __init__(self, data, target):
        self.data = data
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = torch.tensor(self.data[index], dtype=torch.float32)
        y = torch.tensor(self.target[index], dtype=torch.long)
        return x, y


In [22]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_dataset = SklearnDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = SklearnDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=4, only_inference=True)

epochs = 20

for e in range(epochs):
    
    for i,data in enumerate(train_dataloader):
        x, y = data
        
        if i == 0:
            classifier.fit(x, y)
        
        if i != 0:
            start = time.time()
            y_eval, p_eval = classifier.predict(x, return_winning_probability=True)
            print('Prediction time: ', time.time() - start, 'Accuracy', accuracy_score(y, y_eval))   



Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Prediction time:  0.3629920482635498 Accuracy 0.96875
Prediction time:  0.3910393714904785 Accuracy 1.0
Prediction time:  0.38788270950317383 Accuracy 0.984375
Prediction time:  0.43715500831604004 Accuracy 0.953125
Prediction time:  0.4869370460510254 Accuracy 0.890625
Prediction time:  0.4944422245025635 Accuracy 1.0
Prediction time:  0.35973501205444336 Accuracy 1.0
Prediction time:  0.4930539131164551 Accuracy 1.0
Prediction time:  0.49391794204711914 Accuracy 0.9375
Prediction time:  0.4987359046936035 Accuracy 0.953125
Prediction time:  0.4889719486236572 Accuracy 0.9375
Prediction time:  0.4104743003845215 Accuracy 0.984375
Prediction time:  0.38995957374572754 Accuracy 1.0
Prediction time:  0.2539186477661133 Accuracy 1.0
Prediction time:  0.3635904788970947 Accuracy 1.0
Prediction time:  0.4484696388244629 Accuracy 0.9375
Prediction time:  0.4317033290863037 Accuracy 0.953125
Predicti

KeyboardInterrupt: 

### Sample One Batch From Breast Cancer Dataset

In [4]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=40, test_size=10, random_state=42)

classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=4, only_inference=True)

start = time.time()
classifier.fit(X_train, y_train)
y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)
print('Prediction time: ', time.time() - start, 'Accuracy', accuracy_score(y_test, y_eval))

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Prediction time:  0.19298958778381348 Accuracy 0.8


### Config Setup

In [None]:
model_string = ''
i, e = '8x_lr0.0003', 4
base_path = '.'
device='cpu'

### Load TabPFN Model

In [None]:
model, c, results_file = load_model_workflow(i, e, add_name=model_string, base_path=base_path, device=device, eval_addition='', only_inference=False)

##### Meta Training Loop

In [None]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=40, test_size=10, random_state=42)

X, y = check_X_y(X_train, y_train, force_all_finite=False)

y = np.asarray(y, dtype=np.float64, order="C")
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_test = check_array(X_test, force_all_finite=False)
X_full = np.concatenate([X, X_test], axis=0)
X_full = torch.tensor(X_full, device=device, requires_grad=True).float().unsqueeze(1)
y_full = np.concatenate([y, np.zeros_like(X[:, 0])], axis=0)
y_full = torch.tensor(y_full, device=device, requires_grad=True).float().unsqueeze(1)
eval_position = X.shape[0]

y_test = torch.from_numpy(y_test)

criterion = nn.CrossEntropyLoss()
model[2].train()
optimizer = optim.Adam(model[2].parameters(), lr=0.001)
for e in range (10):
    optimizer.zero_grad()
    prediction = transformer_predict(model[2], X_full, y_full, eval_position,
                            device='cpu',
                            style=None,
                            inference_mode=False,
                                    N_ensemble_configurations=3,
                            softmax_temperature=None, **get_params_from_config(c))

    loss = criterion( prediction.squeeze(0),y_test)
    print(e, '|', loss)
    loss.backward()
    optimizer.step()

In [None]:
classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=4, only_inference=False)

In [None]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=40, test_size=10, random_state=42)

classifier.model[2].train()

y_test = torch.from_numpy(y_test)
optimizer = optim.Adam(classifier.model[2].parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

for e in range (100):
    optimizer.zero_grad()
    classifier.fit(X_train, y_train)
    prediction = classifier.predict_proba2(X_test)
    prediction = prediction.squeeze(0)
    loss = criterion(prediction,y_test)
    print(loss)
    loss.backward()
    optimizer.step()


In [None]:
print(prediction.shape)
print(y_test.shape)

In [None]:
for name, params in classifier.model[2].named_parameters():
    print('name :', name , "|" 'params :', params.requires_grad)

In [None]:
start = time.time()
classifier.fit(X_train, y_train)
y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)
print('Prediction time: ', time.time() - start, 'Accuracy', accuracy_score(y_test, y_eval))