In [1]:
from __future__ import annotations

import os 

from torch.utils.data import random_split

from mmpfn.datasets.airbnb import AirbnbDataset

import os 
import torch 
import numpy as np 
import pandas as pd

from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, root_mean_squared_error

from mmpfn.models.tabpfn_v2 import TabPFNClassifier
from mmpfn.models.dino_v2.models.vision_transformer import vit_base
from mmpfn.models.tabpfn_v2.constants import ModelInterfaceConfig
from mmpfn.models.tabpfn_v2.preprocessing import PreprocessorConfig
from mmpfn.scripts_finetune.finetune_tabpfn_main import fine_tune_tabpfn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_path = os.path.join(os.getenv('HOME'), "works/research/MultiModalPFN/mmpfn/data/airbnb")
dataset = AirbnbDataset(data_path)

  df = pd.read_csv(os.path.join(data_path, FILENAME))


In [3]:
accuracy_scores = []
for seed in range(5):
    torch.manual_seed(seed)
    # np.random.seed(seed)
    # print(f"Finetuning with seed: {seed}")
    
    train_len = int(len(dataset) * 0.8)
    test_len = len(dataset) - train_len

    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    X_train = train_dataset.dataset.x[train_dataset.indices]
    y_train = train_dataset.dataset.y[train_dataset.indices]
    X_test = test_dataset.dataset.x[test_dataset.indices]
    y_test = test_dataset.dataset.y[test_dataset.indices]

    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    save_path_to_fine_tuned_model = "./finetuned_tabpfn_pad_ufes_20.ckpt"
    
    fine_tune_tabpfn(
        # path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=60,
        finetuning_config={"learning_rate": 0.00001, "batch_size": 1, "max_steps": 100},
        validation_metric="log_loss",
        # Input Data
        X_train=pd.DataFrame(X_train),
        y_train=pd.Series(y_train),
        categorical_features_index=None,
        device="cuda",  # use "cpu" if you don't have a GPU
        task_type="multiclass",
        # Optional
        show_training_curve=False,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
    )

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = TabPFNClassifier(
        model_path=save_path_to_fine_tuned_model,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
    )

    clf_finetuned = model_finetuned.fit(X_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(X_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)

Fine-tuning Steps:   5%|▌         | 5/100 [00:20<08:02,  5.08s/it, Best Val. Loss=1.38, Best Val. Score=-1.38, Training Loss=1.37, Val. Loss=1.38, Patience=46, Utilization=0, Grad Norm=3.82][2025-09-08 23:19:47,618] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [08:31,  5.12s/it, Best Val. Loss=1.35, Best Val. Score=-1.35, Training Loss=1.38, Val. Loss=1.36, Patience=-49, Utilization=0, Grad Norm=4.04]                         
[2025-09-08 23:27:55,733] INFO - Initial Validation Loss: 1.381251710737002 Best Validation Loss: 1.3527545409354047 Total Steps: 101 Best Step: 61 Total Time Spent: 513.2224526405334


accuracy_score (Finetuned): 0.46891002194586684


Fine-tuning Steps:  12%|█▏        | 12/100 [00:26<03:28,  2.37s/it, Best Val. Loss=1.37, Best Val. Score=-1.37, Training Loss=1.44, Val. Loss=1.37, Patience=39, Utilization=0, Grad Norm=3.85][2025-09-08 23:28:30,861] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [03:52,  2.33s/it, Best Val. Loss=1.36, Best Val. Score=-1.36, Training Loss=1.34, Val. Loss=1.36, Patience=-49, Utilization=0, Grad Norm=3.37]                         
[2025-09-08 23:31:56,040] INFO - Initial Validation Loss: 1.3848089620547392 Best Validation Loss: 1.3593912010892568 Total Steps: 101 Best Step: 100 Total Time Spent: 233.68022513389587


accuracy_score (Finetuned): 0.4747622531089978


Fine-tuning Steps:   2%|▏         | 2/100 [00:02<03:56,  2.42s/it, Best Val. Loss=1.38, Best Val. Score=-1.38, Training Loss=1.41, Val. Loss=1.38, Patience=49, Utilization=0, Grad Norm=3.33][2025-09-08 23:32:03,975] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   4%|▍         | 4/100 [00:07<04:05,  2.55s/it, Best Val. Loss=1.37, Best Val. Score=-1.37, Training Loss=1.41, Val. Loss=1.37, Patience=48, Utilization=0, Grad Norm=5.69]


KeyboardInterrupt: 

In [None]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

Mean Accuracy: 0.8304347826086957
Std Accuracy: 0.01816225962974384
