In [1]:
from __future__ import annotations

import os 

from torch.utils.data import random_split

from mmpfn.datasets.cloth import ClothDataset

import os 
import torch 
import numpy as np 
import pandas as pd

from sklearn.metrics import accuracy_score
from mmpfn.models.mmpfn_v2 import MMPFNClassifier
from mmpfn.models.mmpfn_v2.constants import ModelInterfaceConfig
from mmpfn.models.mmpfn_v2.preprocessing import PreprocessorConfig
from mmpfn.scripts_finetune_mm.finetune_tabpfn_main import fine_tune_mmpfn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# data_path = os.path.join(os.getenv('HOME'), "workspace/works/tabular_image/MultiModalPFN/mmpfn/data/cloth")
data_path = os.path.join(os.getenv('HOME'), "works/research/MultiModalPFN/mmpfn/data/cloth")
dataset = ClothDataset(data_path)
_ = dataset.get_embeddings()

Load embeddings from embeddings/cloth/cloth.pt


In [3]:
# MGM 1
# Mean Accuracy: 0.5998674326115776
# Std Accuracy: 0.006042772572327586

In [4]:
accuracy_scores = []
for seed in range(5):
    torch.manual_seed(seed)
    train_len = int(len(dataset) * 0.8)
    test_len = len(dataset) - train_len

    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    X_train = train_dataset.dataset.x[train_dataset.indices]
    y_train = train_dataset.dataset.y[train_dataset.indices]
    X_test = test_dataset.dataset.x[test_dataset.indices]
    y_test = test_dataset.dataset.y[test_dataset.indices]
    image_train = train_dataset.dataset.embeddings[train_dataset.indices]#.unsqueeze(1)
    image_test = test_dataset.dataset.embeddings[test_dataset.indices]#.unsqueeze(1)

    # if image_type == 'cls':
    #     image_train = image_train.unsqueeze(1)
    #     image_test = image_test.unsqueeze(1)
        
    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    save_path_to_fine_tuned_model = "./finetuned_mmpfn_pad_ufes_20.ckpt"
    
    fine_tune_mmpfn(
        # path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=60,
        finetuning_config={"learning_rate": 0.00001, "batch_size": 1, "max_steps": 100},
        validation_metric="log_loss",
        # Input Data
        X_train=None,
        image_train=image_train,
        y_train=pd.Series(y_train),
        categorical_features_index=None,
        device="cuda",  # use "cpu" if you don't have a GPU
        task_type="multiclass",
        # Optional
        show_training_curve=False,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
        freeze_input=True,  # Freeze the input layers (encoder and y_encoder) during finetuning
        mixer_type='MGM+CQAM', # MGM, MGM+CQAM
        mgm_heads=16,
        cqam_heads=8,
    )

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = MMPFNClassifier(
        model_path=save_path_to_fine_tuned_model,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
        mixer_type='MGM+CQAM', # MGM, MGM+CQAM
        mgm_heads=16,
        cqam_heads=8,
    )

    clf_finetuned = model_finetuned.fit(None, image_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(None, image_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)

Fine-tuning Steps:   1%|          | 1/100 [00:00<?, ?it/s][2025-09-21 00:56:50,303] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [13:47,  8.28s/it, Best Val. Loss=0.84, Best Val. Score=-0.84, Training Loss=0.879, Val. Loss=0.84, Patience=-49, Utilization=0, Grad Norm=0.845]                           
[2025-09-21 01:10:32,382] INFO - Initial Validation Loss: 1.226486097523351 Best Validation Loss: 0.8397275896332557 Total Steps: 101 Best Step: 100 Total Time Spent: 832.5075809955597


accuracy_score (Finetuned): 0.6380910296067167


Fine-tuning Steps:   1%|          | 1/100 [00:00<?, ?it/s][2025-09-21 01:11:21,455] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   2%|▏         | 2/100 [00:05<09:19,  5.71s/it, Best Val. Loss=1.24, Best Val. Score=-1.24, Training Loss=1.25, Val. Loss=1.24, Patience=50, Utilization=0, Grad Norm=nan][2025-09-21 01:11:27,259] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [13:38,  8.19s/it, Best Val. Loss=0.859, Best Val. Score=-0.859, Training Loss=0.898, Val. Loss=0.859, Patience=-48, Utilization=0, Grad Norm=1.28]                         
[2025-09-21 01:24:54,677] INFO - Initial Validation Loss: 1.2396382165848059 Best Validation Loss: 0.85908805863119 Total Steps: 101 Best Step: 100 Total Time Spent: 821.5920803546906


accuracy_score (Finetuned): 0.6447193990278391


Fine-tuning Steps:   1%|          | 1/100 [00:00<?, ?it/s][2025-09-21 01:25:42,891] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   2%|▏         | 2/100 [00:05<09:18,  5.70s/it, Best Val. Loss=1.34, Best Val. Score=-1.34, Training Loss=1.27, Val. Loss=1.34, Patience=50, Utilization=0, Grad Norm=nan][2025-09-21 01:25:48,686] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   3%|▎         | 3/100 [00:11<09:18,  5.76s/it, Best Val. Loss=1.34, Best Val. Score=-1.34, Training Loss=1.26, Val. Loss=1.34, Patience=50, Utilization=0, Grad Norm=nan][2025-09-21 01:25:54,450] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [13:32,  8.12s/it, Best Val. Loss=0.859, Best Val. Score=-0.859, Training Loss=0.909, Val. Loss=0.859, Patience=-47, Utilization=0, Grad Norm=1.91]                         
[2025-09-21 01:39:09,524] INFO - Initial Validation Loss: 1.335107493382765 Best Validation L

accuracy_score (Finetuned): 0.6303579319487406


Fine-tuning Steps:   1%|          | 1/100 [00:00<?, ?it/s][2025-09-21 01:39:58,130] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [13:37,  8.17s/it, Best Val. Loss=0.858, Best Val. Score=-0.858, Training Loss=0.886, Val. Loss=0.858, Patience=-49, Utilization=0, Grad Norm=1.38]                         
[2025-09-21 01:53:29,871] INFO - Initial Validation Loss: 1.214977630950318 Best Validation Loss: 0.8581024257558638 Total Steps: 101 Best Step: 100 Total Time Spent: 820.0435707569122


accuracy_score (Finetuned): 0.638753866548829


Fine-tuning Steps:   1%|          | 1/100 [00:00<?, ?it/s][2025-09-21 01:54:18,107] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   2%|▏         | 2/100 [00:05<09:16,  5.68s/it, Best Val. Loss=1.26, Best Val. Score=-1.26, Training Loss=1.24, Val. Loss=1.26, Patience=50, Utilization=0, Grad Norm=nan][2025-09-21 01:54:23,920] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [13:38,  8.19s/it, Best Val. Loss=0.859, Best Val. Score=-0.859, Training Loss=0.885, Val. Loss=0.859, Patience=-48, Utilization=0, Grad Norm=2.34]                         
[2025-09-21 02:07:51,207] INFO - Initial Validation Loss: 1.258643369487801 Best Validation Loss: 0.8585917368111191 Total Steps: 101 Best Step: 100 Total Time Spent: 821.3760113716125


accuracy_score (Finetuned): 0.6283694211224039


In [5]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

Mean Accuracy: 0.6360583296509058
Std Accuracy: 0.005967005268189257


In [6]:
accuracy_scores = []
for seed in range(5):
    torch.manual_seed(seed)
    train_len = int(len(dataset) * 0.8)
    test_len = len(dataset) - train_len

    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    X_train = train_dataset.dataset.x[train_dataset.indices]
    y_train = train_dataset.dataset.y[train_dataset.indices]
    X_test = test_dataset.dataset.x[test_dataset.indices]
    y_test = test_dataset.dataset.y[test_dataset.indices]
    image_train = train_dataset.dataset.embeddings[train_dataset.indices]#.unsqueeze(1)
    image_test = test_dataset.dataset.embeddings[test_dataset.indices]#.unsqueeze(1)

    # if image_type == 'cls':
    #     image_train = image_train.unsqueeze(1)
    #     image_test = image_test.unsqueeze(1)
        
    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    save_path_to_fine_tuned_model = "./finetuned_mmpfn_pad_ufes_20.ckpt"
    
    fine_tune_mmpfn(
        # path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=60,
        finetuning_config={"learning_rate": 0.00001, "batch_size": 1, "max_steps": 100},
        validation_metric="log_loss",
        # Input Data
        X_train=None,
        image_train=image_train,
        y_train=pd.Series(y_train),
        categorical_features_index=None,
        device="cuda",  # use "cpu" if you don't have a GPU
        task_type="multiclass",
        # Optional
        show_training_curve=False,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
        freeze_input=True,  # Freeze the input layers (encoder and y_encoder) during finetuning
        mixer_type='MGM+CQAM', # MGM, MGM+CQAM
        mgm_heads=32,
        cqam_heads=8,
    )

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = MMPFNClassifier(
        model_path=save_path_to_fine_tuned_model,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
        mixer_type='MGM+CQAM', # MGM, MGM+CQAM
        mgm_heads=32,
        cqam_heads=8,
    )

    clf_finetuned = model_finetuned.fit(None, image_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(None, image_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)

Fine-tuning Steps: 101it [25:34, 15.35s/it, Best Val. Loss=0.813, Best Val. Score=-0.813, Training Loss=0.842, Val. Loss=0.813, Patience=-50, Utilization=0, Grad Norm=0.915]                         
[2025-09-21 02:34:12,529] INFO - Initial Validation Loss: 1.1488419810416064 Best Validation Loss: 0.8129818799517735 Total Steps: 101 Best Step: 100 Total Time Spent: 1537.8710236549377


accuracy_score (Finetuned): 0.643172779496244


Fine-tuning Steps: 101it [25:10, 15.10s/it, Best Val. Loss=0.838, Best Val. Score=-0.838, Training Loss=0.84, Val. Loss=0.838, Patience=-50, Utilization=0, Grad Norm=1.21]                           
[2025-09-21 03:01:17,810] INFO - Initial Validation Loss: 1.161577433740172 Best Validation Loss: 0.8375542915552114 Total Steps: 101 Best Step: 100 Total Time Spent: 1513.7210013866425


accuracy_score (Finetuned): 0.6586389748121962


Fine-tuning Steps: 101it [26:44, 16.04s/it, Best Val. Loss=0.813, Best Val. Score=-0.813, Training Loss=0.873, Val. Loss=0.813, Patience=-50, Utilization=0, Grad Norm=0.975]                         
[2025-09-21 03:31:05,646] INFO - Initial Validation Loss: 1.1533559477498656 Best Validation Loss: 0.813131201035461 Total Steps: 101 Best Step: 100 Total Time Spent: 1607.126315832138


accuracy_score (Finetuned): 0.6478126380910296


Fine-tuning Steps:   7%|▋         | 7/100 [01:03<16:28, 10.63s/it, Best Val. Loss=1.06, Best Val. Score=-1.06, Training Loss=1.16, Val. Loss=1.06, Patience=44, Utilization=0, Grad Norm=1.67] [2025-09-21 03:34:13,680] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [23:55, 14.36s/it, Best Val. Loss=0.818, Best Val. Score=-0.818, Training Loss=0.846, Val. Loss=0.818, Patience=-49, Utilization=0, Grad Norm=0.981]                         
[2025-09-21 03:56:58,003] INFO - Initial Validation Loss: 1.160625210789518 Best Validation Loss: 0.818443738526786 Total Steps: 101 Best Step: 100 Total Time Spent: 1442.7635006904602


accuracy_score (Finetuned): 0.6533362792752982


Fine-tuning Steps: 101it [25:19, 15.20s/it, Best Val. Loss=0.822, Best Val. Score=-0.822, Training Loss=0.833, Val. Loss=0.822, Patience=-50, Utilization=0, Grad Norm=0.935]                         
[2025-09-21 04:25:23,735] INFO - Initial Validation Loss: 1.159164211994054 Best Validation Loss: 0.8222821993148459 Total Steps: 101 Best Step: 100 Total Time Spent: 1522.6345643997192


accuracy_score (Finetuned): 0.6464869642068052


In [7]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

Mean Accuracy: 0.6498895271763147
Std Accuracy: 0.0054669650885417215


In [8]:
accuracy_scores = []
for seed in range(5):
    torch.manual_seed(seed)
    train_len = int(len(dataset) * 0.8)
    test_len = len(dataset) - train_len

    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    X_train = train_dataset.dataset.x[train_dataset.indices]
    y_train = train_dataset.dataset.y[train_dataset.indices]
    X_test = test_dataset.dataset.x[test_dataset.indices]
    y_test = test_dataset.dataset.y[test_dataset.indices]
    image_train = train_dataset.dataset.embeddings[train_dataset.indices]#.unsqueeze(1)
    image_test = test_dataset.dataset.embeddings[test_dataset.indices]#.unsqueeze(1)

    # if image_type == 'cls':
    #     image_train = image_train.unsqueeze(1)
    #     image_test = image_test.unsqueeze(1)
        
    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    save_path_to_fine_tuned_model = "./finetuned_mmpfn_pad_ufes_20.ckpt"
    
    fine_tune_mmpfn(
        # path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=60,
        finetuning_config={"learning_rate": 0.00001, "batch_size": 1, "max_steps": 100},
        validation_metric="log_loss",
        # Input Data
        X_train=None,
        image_train=image_train,
        y_train=pd.Series(y_train),
        categorical_features_index=None,
        device="cuda",  # use "cpu" if you don't have a GPU
        task_type="multiclass",
        # Optional
        show_training_curve=False,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
        freeze_input=True,  # Freeze the input layers (encoder and y_encoder) during finetuning
        mixer_type='MGM', # MGM, MGM+CQAM
        mgm_heads=64,
        cqam_heads=8,
    )

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = MMPFNClassifier(
        model_path=save_path_to_fine_tuned_model,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
        mixer_type='MGM', # MGM, MGM+CQAM
        mgm_heads=64,
        cqam_heads=8,
    )

    clf_finetuned = model_finetuned.fit(None, image_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(None, image_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)

Fine-tuning Steps:   1%|          | 1/100 [00:00<?, ?it/s][2025-09-21 04:41:22,525] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   2%|▏         | 2/100 [13:30<22:03:56, 810.57s/it, Best Val. Loss=1.4, Best Val. Score=-1.4, Training Loss=1.28, Val. Loss=1.4, Patience=50, Utilization=0, Grad Norm=nan][2025-09-21 05:02:59,817] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   3%|▎         | 3/100 [35:07<29:33:17, 1096.88s/it, Best Val. Loss=1.4, Best Val. Score=-1.4, Training Loss=1.32, Val. Loss=1.4, Patience=50, Utilization=0, Grad Norm=nan][2025-09-21 05:21:02,521] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:   4%|▍         | 4/100 [1:05:57<35:10:51, 1319.29s/it, Best Val. Loss=1.4, Best Val. Score=-1.4, Training Loss=1.25, Val. Loss=1.4, Patience=50, Utilization=0, Grad Norm=nan]


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

In [None]:
accuracy_scores = []
for seed in range(5):
    torch.manual_seed(seed)
    train_len = int(len(dataset) * 0.8)
    test_len = len(dataset) - train_len

    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    X_train = train_dataset.dataset.x[train_dataset.indices]
    y_train = train_dataset.dataset.y[train_dataset.indices]
    X_test = test_dataset.dataset.x[test_dataset.indices]
    y_test = test_dataset.dataset.y[test_dataset.indices]
    image_train = train_dataset.dataset.embeddings[train_dataset.indices]#.unsqueeze(1)
    image_test = test_dataset.dataset.embeddings[test_dataset.indices]#.unsqueeze(1)

    # if image_type == 'cls':
    #     image_train = image_train.unsqueeze(1)
    #     image_test = image_test.unsqueeze(1)
        
    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    save_path_to_fine_tuned_model = "./finetuned_mmpfn_pad_ufes_20.ckpt"
    
    fine_tune_mmpfn(
        # path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=60,
        finetuning_config={"learning_rate": 0.00001, "batch_size": 1, "max_steps": 100},
        validation_metric="log_loss",
        # Input Data
        X_train=None,
        image_train=image_train,
        y_train=pd.Series(y_train),
        categorical_features_index=None,
        device="cuda",  # use "cpu" if you don't have a GPU
        task_type="multiclass",
        # Optional
        show_training_curve=False,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
        freeze_input=True,  # Freeze the input layers (encoder and y_encoder) during finetuning
        mixer_type='MGM', # MGM, MGM+CQAM
        mgm_heads=128,
        cqam_heads=8,
    )

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = MMPFNClassifier(
        model_path=save_path_to_fine_tuned_model,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
        mixer_type='MGM', # MGM, MGM+CQAM
        mgm_heads=128,
        cqam_heads=8,
    )

    clf_finetuned = model_finetuned.fit(None, image_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(None, image_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)

In [None]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)