In [None]:
from __future__ import annotations

import os 

from torch.utils.data import random_split

from mmpfn.datasets.pad_ufes_20 import PADUFES20Dataset

import os 
import torch 
import numpy as np 

from sklearn.metrics import accuracy_score

from mmpfn.models.tabpfn_v2 import TabPFNClassifier
from mmpfn.models.tabpfn_v2.constants import ModelInterfaceConfig
from mmpfn.models.tabpfn_v2.preprocessing import PreprocessorConfig



In [None]:
# data_path = os.path.join(os.getenv('HOME'), "workspace/works/tabular_image/MultiModalPFN/mmpfn/data/pad_ufes_20")
data_path = os.path.join(os.getenv('HOME'), "works/research/MultiModalPFN/mmpfn/data/pad_ufes_20")
dataset = PADUFES20Dataset(data_path)

In [3]:
accuracy_scores = []
for seed in range(10):
    torch.manual_seed(seed)
    print(f"Finetuning with seed: {seed}")
    
    train_len = int(len(dataset) * 0.8)
    test_len = len(dataset) - train_len

    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    X_train = train_dataset.dataset.x[train_dataset.indices]
    y_train = train_dataset.dataset.y[train_dataset.indices]
    X_test = test_dataset.dataset.x[test_dataset.indices]
    y_test = test_dataset.dataset.y[test_dataset.indices]

    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    model_path = "/home/wall/works/research/MultiModalPFN/mmpfn/parameters/tabpfn-v2-classifier.ckpt"

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = TabPFNClassifier(
        model_path=model_path,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
    )

    clf_finetuned = model_finetuned.fit(X_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(X_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)

Finetuning with seed: 0
accuracy_score (Finetuned): 0.8282608695652174
Finetuning with seed: 1
accuracy_score (Finetuned): 0.8065217391304348
Finetuning with seed: 2
accuracy_score (Finetuned): 0.8043478260869565
Finetuning with seed: 3
accuracy_score (Finetuned): 0.8347826086956521
Finetuning with seed: 4
accuracy_score (Finetuned): 0.808695652173913
Finetuning with seed: 5
accuracy_score (Finetuned): 0.8456521739130435
Finetuning with seed: 6
accuracy_score (Finetuned): 0.8
Finetuning with seed: 7
accuracy_score (Finetuned): 0.8152173913043478
Finetuning with seed: 8
accuracy_score (Finetuned): 0.8130434782608695
Finetuning with seed: 9
accuracy_score (Finetuned): 0.8260869565217391


In [4]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

Mean Accuracy: 0.8182608695652174
Std Accuracy: 0.014061705764170623
