In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from tabpfn.scripts.decision_boundary import DecisionBoundaryDisplay
from tabpfn import TabPFNClassifier 
from ensemble_tabpfn import EnsembleTabPFN
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [2]:
zero_noise = pd.read_csv("data/zero_noise.csv")
low_noise = pd.read_csv("data/low_noise.csv")
high_noise = pd.read_csv("data/high_noise.csv")
low_noise.drop(columns=["data_type"], inplace=True)
high_noise.drop(columns=["data_type"], inplace=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
def encode(v, class_values):
    return class_values.index(v)

def encode_target(v, class_values):
    return class_values.index(v)

class_values_zero = list(zero_noise.era.unique())
class_values_zero_target_5 = list(zero_noise.target_5_val.unique())
class_values_zero_target_10 = list(zero_noise.target_10_val.unique())
zero_noise["era_label"] = zero_noise["era"].apply(encode, args=(class_values_zero,))
zero_noise["target_5_val_label"] = zero_noise["target_5_val"].apply(encode_target, args=(class_values_zero_target_5,))
zero_noise["target_10_val_label"] = zero_noise["target_10_val"].apply(encode_target, args=(class_values_zero_target_10,))

class_values_low = list(low_noise.era.unique())
class_values_low_target_5 = list(low_noise.target_5_val.unique())
class_values_low_target_10 = list(low_noise.target_10_val.unique())
low_noise["era_label"] = low_noise["era"].apply(encode, args=(class_values_low,))
low_noise["target_5_val_label"] = low_noise["target_5_val"].apply(encode_target, args=(class_values_low_target_5,))
low_noise["target_10_val_label"] = low_noise["target_10_val"].apply(encode_target, args=(class_values_low_target_10,))

class_values_high = list(high_noise.era.unique())
class_values_high_target_5 = list(high_noise.target_5_val.unique())
class_values_high_target_10 = list(high_noise.target_10_val.unique())
high_noise["era_label"] = high_noise["era"].apply(encode, args=(class_values_high,))
high_noise["target_5_val_label"] = high_noise["target_5_val"].apply(encode_target, args=(class_values_high_target_5,))
high_noise["target_10_val_label"] = high_noise["target_10_val"].apply(encode_target, args=(class_values_high_target_10,))

dataset = low_noise
target_column = "target_10_val_label"
output_classes = 12 if target_column=="era_label" else 5

In [4]:
class CustomDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return (
            torch.tensor(self.X[index], dtype=torch.float32),
            torch.tensor(self.y[index], dtype=torch.long),
        )

In [5]:
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))

dataset = dataset.sample(frac=1).reset_index(drop=True)
train_dataset = dataset.iloc[:train_size]
val_dataset = dataset.iloc[train_size:train_size+val_size]
test_dataset = dataset.iloc[train_size+val_size:]

train_X = train_dataset.iloc[:, :-8].values
train_y = train_dataset.loc[:, target_column].values

val_X = val_dataset.iloc[:, :-8].values
val_y = val_dataset.loc[:, target_column].values

test_X = test_dataset.iloc[:, :-8].values
test_y = test_dataset.loc[:, target_column].values

In [6]:
train_dataset = CustomDataset(train_X, train_y)
val_dataset = CustomDataset(val_X, val_y)
test_dataset = CustomDataset(test_X, test_y)

train_dataloader = DataLoader(train_dataset, batch_size=500, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=500, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=500, shuffle=False)

In [7]:
class MLP(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = layers
        self.linear = nn.Linear(output_classes, 1)

    def forward(self, x):

        for layer in self.layers[:-1]:
            x = layer(x)

        logits = self.layers[-1](x).to(torch.float64).to(device)
        x1 = self.linear(x)
        tau = torch.sigmoid(x1).to(torch.float64).to(device)

        return logits, tau
    
layers = nn.ModuleList([
        nn.Linear(24, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 32),
        nn.ReLU(),
        nn.Linear(32, output_classes),
        nn.Softmax(dim=1),
    ])

In [8]:
model = MLP(layers).to(device)

learning_rate = 0.0025
learning_rate_auto = 0.001
weight_decay = 0.0001
epochs = 25
epochs_auto = 25

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [9]:
def train(model, optimizer, loss_fn, epochs, train_dataloader, val_dataloader,
          bagging_mode=False, validation_mode=True, NAL_loss=False, debug_mode=False):
    
    batch=0
    val_losses=[]
    val_accuracies=[]
    train_losses=[]
    train_accuracies=[]
    num_batches=int(len(train_dataloader)*0.9)
    for epoch in range(epochs):
        
        train_accuracy=0.0
        train_loss=0.0
        model.train()
        batch=0
        for feature, label in iter(train_dataloader):
            optimizer.zero_grad()
            feature = feature.to(device)
            label = label.to(device)

            if NAL_loss:
                output, tau = model(feature)
                loss = loss_fn(output, label, tau)
            else:
                output, tau = model(feature)
                loss = loss_fn(output, label)

            loss.backward()
            optimizer.step()
            
            train_loss+=(loss.item())/len(label)
            train_accuracy+=torch.sum(torch.argmax(output, dim=1) == label)/len(label)

            batch+=1
            if bagging_mode:
                if batch==num_batches:
                    break
            if debug_mode:
                break
            
        train_losses.append(train_loss/len(train_dataloader))
        train_accuracies.append(train_accuracy/len(train_dataloader))

        print(f"Training loss: {train_loss}")
        print(f"Training accuracy: {train_accuracy/len(train_dataloader)}")
        print("-----------------------------------------------")
        
        if validation_mode:
            val_loss, val_acc = validation(model, val_dataloader, loss_fn)
            val_losses.append(val_loss)
            val_accuracies.append(val_acc)
        batch=0

    if validation_mode:
        return train_losses, train_accuracies, val_losses, val_accuracies
    else:
        return train_losses, train_accuracies

def validation(model, val_dataloader, criterion):
    model.eval()
    val_accuracy = 0
    val_loss = 0
    with torch.no_grad():
        for feature, label in iter(val_dataloader):
            feature = feature.to(device)
            label = label.to(device)
            output = model(feature)
            val_loss += criterion(output, label).item()
            val_accuracy += torch.sum(torch.argmax(output, dim=1) == label)/len(label)
            
        print(f"Validation loss: {val_loss/len(val_dataloader)}")
        print(f"Validation accuracy: {val_accuracy/len(val_dataloader)}")

    return val_loss/len(val_dataloader), val_accuracy/len(val_dataloader)

def test(model, test_dataloader):
    model.eval()
    with torch.no_grad():
        accuracy = 0
        for feature, label in iter(test_dataloader):
            feature, label = feature.to(device), label.to(device)
            outputs, _ = model(feature)
            accuracy += torch.sum(torch.argmax(outputs, dim=1) == label)/len(label)
        print(f"Test accuracy: {accuracy/len(test_dataloader)}")

def plot(train, val, mode):
    if mode == "accuracy":
        for i in range(len(train)):
            train[i] = train[i].cpu().numpy()
    
    plt.figure(figsize=(10, 5))
    plt.plot(train, label=f"Training {mode}")
    plt.plot(val, label=f"Validation {mode}")
    plt.title(f"Training and Validation {mode} Curve")

    plt.xlabel("Epochs")
    plt.ylabel(f"{mode}")
    plt.legend()
    plt.show()

In [10]:
classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=4)
# classifier.model

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters


In [11]:
# X = torch.tensor(train_X, dtype=torch.float32)
# y = torch.tensor(train_y, dtype=torch.long)
# X_test = torch.tensor(test_X, dtype=torch.float32)

# classifier.fit(X, y, overwrite_warning=True)
# y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)

In [12]:
# print(f"Accuracy: {accuracy_score(test_y, y_eval)}")

In [10]:
evals = []
models = []
X_test = torch.tensor(test_X, dtype=torch.float32)

for (feature, label) in tqdm(train_dataloader):
    classifier = TabPFNClassifier(device=device, N_ensemble_configurations=4)
    classifier.fit(feature, label, overwrite_warning=True)
    y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)
    evals.append(y_eval)
    models.append(classifier)
    # classifier.remove_models_from_memory()

  0%|          | 0/500 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters




OutOfMemoryError: CUDA out of memory. Tried to allocate 954.00 MiB. GPU 0 has a total capacty of 4.00 GiB of which 0 bytes is free. Of the allocated memory 2.33 GiB is allocated by PyTorch, and 757.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [67]:
test_X.shape

(31200, 24)

In [69]:
X_test = torch.tensor(test_X, dtype=torch.float32)
X_test.shape

torch.Size([31200, 24])