In [1]:
from __future__ import annotations

import os 

from torch.utils.data import random_split

from mmpfn.datasets.cbis_ddsm import CBISDDSMDataset

import os 
import torch 
import numpy as np 
import pandas as pd

from sklearn.metrics import accuracy_score, roc_auc_score
from mmpfn.models.mmpfn import MMPFNClassifier
from mmpfn.models.mmpfn.constants import ModelInterfaceConfig
from mmpfn.models.mmpfn.preprocessing import PreprocessorConfig
from mmpfn.scripts_finetune_mm.finetune_tabpfn_main import fine_tune_mmpfn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
# data_path = os.path.join(os.getenv('HOME'), "workspace/works/tabular_image/MultiModalPFN/mmpfn/data/cbis_ddsm")
data_path = os.path.join(os.getenv('HOME'), "works/research/MultiModalPFN/mmpfn/data/cbis_ddsm")

kind = 'mass'  # mass calc
image_type = 'all' # all full crop roi
test_dataset = CBISDDSMDataset(data_path=data_path, data_name=f'csv/{kind}_case_description_test_set.csv', kind=kind, image_type=image_type)
# _ = test_dataset.get_images()
_ = test_dataset.get_embeddings(mode='test')
train_dataset = CBISDDSMDataset(data_path=data_path, data_name=f'csv/{kind}_case_description_train_set.csv', kind=kind, image_type=image_type)
# _ = train_dataset.get_images()
_ = train_dataset.get_embeddings(mode='train')

Load embeddings from embeddings/cbis_ddsm/mass_test_all.pt
Load embeddings from embeddings/cbis_ddsm/mass_train_all.pt


In [None]:
accuracy_scores, auc_ovrs, auc_ovos = [], [], []
for seed in range(5):
    torch.manual_seed(seed)

    X_train = train_dataset.x
    y_train = train_dataset.y
    X_test = test_dataset.x
    y_test = test_dataset.y
    image_train = train_dataset.embeddings
    image_test = test_dataset.embeddings
        
    for i in range(X_train.shape[1]):
        col = X_train[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1
    for i in range(X_test.shape[1]):
        col = X_test[:, i]
        col[np.isnan(col)] = np.nanmin(col) - 1

    torch.cuda.empty_cache()

    save_path_to_fine_tuned_model = "./finetuned_mmpfn_pad_ufes_20.ckpt"
    
    fine_tune_mmpfn(
        # path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=60,
        finetuning_config={"learning_rate": 0.00001, "batch_size": 1, "max_steps": 100},
        validation_metric="log_loss",
        # Input Data
        X_train=pd.DataFrame(X_train),
        image_train=image_train,
        y_train=pd.Series(y_train),
        categorical_features_index=None,
        device="cuda",  # use "cpu" if you don't have a GPU
        task_type="multiclass",
        # Optional
        show_training_curve=False,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
        freeze_input=True,  # Freeze the input layers (encoder and y_encoder) during finetuning
        mixer_type='MGM+CAP', # MGM MGM+CAP
        mgm_heads=8,
        cap_heads=4,
    )

    # disables preprocessing at inference time to match fine-tuning
    no_preprocessing_inference_config = ModelInterfaceConfig(
        FINGERPRINT_FEATURE=False,
        PREPROCESS_TRANSFORMS=[PreprocessorConfig(name='none')]
    )

    # Evaluate on Test Data
    model_finetuned = MMPFNClassifier(
        model_path=save_path_to_fine_tuned_model,
        inference_config=no_preprocessing_inference_config,
        ignore_pretraining_limits=True,
        mixer_type='MGM+CAP', # no_append token_append split_append multihead
        mgm_heads=8,
        cap_heads=4,
    )

    clf_finetuned = model_finetuned.fit(X_train, image_train, y_train)
    acc_score = accuracy_score(y_test, clf_finetuned.predict(X_test, image_test))
    print("accuracy_score (Finetuned):", acc_score)
    accuracy_scores.append(acc_score)
    
    # auc_ovr = roc_auc_score(y_test, clf_finetuned.predict_proba(X_test, image_test), multi_class='ovr')
    # auc_ovrs.append(auc_ovr)
    
    # auc_ovo = roc_auc_score(y_test, clf_finetuned.predict_proba(X_test, image_test), multi_class='ovo')
    # auc_ovos.append(auc_ovo)c:\Users\SuyeonWall\Downloads\breast-cancer-imageclassification.ipynb

Fine-tuning Steps:  19%|█▉        | 19/100 [00:07<00:32,  2.48it/s, Best Val. Loss=0.249, Best Val. Score=-0.249, Training Loss=0.399, Val. Loss=0.249, Patience=32, Utilization=0, Grad Norm=4.67][2025-09-12 23:42:18,929] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps:  22%|██▏       | 22/100 [00:08<00:27,  2.86it/s, Best Val. Loss=0.249, Best Val. Score=-0.249, Training Loss=0.307, Val. Loss=0.25, Patience=30, Utilization=0, Grad Norm=8.19] [2025-09-12 23:42:19,840] INFO - 
Optimizer step skipped due to NaNs/infs in grad scaling.
Fine-tuning Steps: 101it [00:35,  2.79it/s, Best Val. Loss=0.249, Best Val. Score=-0.249, Training Loss=0.296, Val. Loss=0.268, Patience=-48, Utilization=0, Grad Norm=5.01]                         
[2025-09-12 23:42:46,959] INFO - Initial Validation Loss: 0.27744797343325245 Best Validation Loss: 0.248988481589646 Total Steps: 101 Best Step: 18 Total Time Spent: 36.65427112579346


accuracy_score (Finetuned): 0.6851851851851852


Fine-tuning Steps:   4%|▍         | 4/100 [00:01<00:50,  1.90it/s, Best Val. Loss=0.268, Best Val. Score=-0.268, Training Loss=0.342, Val. Loss=0.268, Patience=47, Utilization=0, Grad Norm=4.19]


KeyboardInterrupt: 

In [None]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

In [None]:
# mean_ovr = np.mean(auc_ovrs)
# std_ovr = np.std(auc_ovrs)

# mean_ovo = np.mean(auc_ovos)
# std_ovo = np.std(auc_ovos)

# print("Mean AUC OVR:", mean_ovr)
# print("Std AUC OVR:", std_ovr)
# print("Mean AUC OVO:", mean_ovo)
# print("Std AUC OVO:", std_ovo)