In [1]:
from __future__ import annotations

from mmpfn.datasets.cbis_ddsm import CBISDDSMDataset

import os 
import numpy as np 
import pandas as pd

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, TensorDataset


from sklearn.metrics import accuracy_score, roc_auc_score
from mmpfn.models.mmpfn import MMPFNClassifier
from mmpfn.models.mmpfn.constants import ModelInterfaceConfig
from mmpfn.models.mmpfn.preprocessing import PreprocessorConfig
from mmpfn.scripts_finetune_mm.finetune_tabpfn_main import fine_tune_tabpfn



In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
emb_type='cls' # patch cls
# data_path = os.path.join(os.getenv('HOME'), "workspace/works/tabular_image/MultiModalPFN/mmpfn/data/cbis_ddsm")
data_path = os.path.join(os.getenv('HOME'), "works/research/MultiModalPFN/mmpfn/data/cbis_ddsm")

kind = 'mass'  # mass calc
train_dataset = CBISDDSMDataset(data_path=data_path, data_name=f'csv/{kind}_case_description_train_set.csv', kind=kind, image_type='ROI')
# _ = train_dataset.get_images()s
_ = train_dataset.get_embeddings(emb_type=emb_type, mode='train')
test_dataset = CBISDDSMDataset(data_path=data_path, data_name=f'csv/{kind}_case_description_test_set.csv', kind=kind, image_type='ROI')
# _ = test_dataset.get_images()
_ = test_dataset.get_embeddings(emb_type=emb_type, mode='test')

['MALIGNANT' 'BENIGN']
Load embeddings from embeddings/cbis_ddsm/mass_cls_train_ROI.pt
['MALIGNANT' 'BENIGN']
Load embeddings from embeddings/cbis_ddsm/mass_cls_test_ROI.pt


In [4]:
class ClassifierHead(nn.Module):
	def __init__(self, in_dim:int, n_classes:int, h_dim:int, p_drop:float):
		super().__init__()
		self.classifier = nn.Sequential(
			nn.LayerNorm(in_dim),
			nn.Linear(in_dim, h_dim),
			nn.GELU(),
			nn.Dropout(p=p_drop),
			nn.Linear(h_dim, n_classes),
		)
	def forward(self, x):
		return self.classifier(x)

In [None]:
accuracy_scores = []
n_epochs = 200
for seed in range(5):
    torch.manual_seed(seed)

    train_len = int(len(train_dataset) * 0.85)
    valid_len = len(train_dataset) - train_len
 
    train_dataset_, valid_dataset = random_split(train_dataset, [train_len, valid_len])
 
    y_train = train_dataset_.dataset.y[train_dataset_.indices]
    y_valid = valid_dataset.dataset.y[valid_dataset.indices]
    y_test = test_dataset.y
    
    image_train = train_dataset_.dataset.embeddings[train_dataset_.indices]
    image_valid = valid_dataset.dataset.embeddings[valid_dataset.indices]
    image_test = test_dataset.embeddings

    n_classes = len(set(y_train))

    torch.cuda.empty_cache()

    y_train = torch.from_numpy(y_train)
    y_valid = torch.from_numpy(y_valid)
    y_test = torch.from_numpy(y_test)

    dataset_train = TensorDataset(image_train, y_train)
    dataset_valid = TensorDataset(image_valid, y_valid)
    dataset_test = TensorDataset(image_test, y_test)

    batch_size = 16
    dataloader_train = DataLoader(dataset_train, batch_size = batch_size, shuffle=True)
    dataloader_valid = DataLoader(dataset_valid, batch_size = 1, shuffle=True)
    dataloader_test = DataLoader(dataset_test, batch_size = 1, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ClassifierHead(
        in_dim = image_train.shape[-1], 
        n_classes = n_classes,
        h_dim = int(image_train.shape[-1] / 2),
        p_drop=0.1,
    ).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    best_val_acc = 0.
    best_model_path = "best_header.pth"

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0.
        for images, ys in dataloader_train:
            optimizer.zero_grad()
            preds = model(images.to(device))
            loss = criterion(preds, ys.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, ys in dataloader_valid:
                preds = model(images.to(device))
                predicted = preds.argmax(dim=1)
                correct += (predicted.cpu() == ys).sum().item()
                total += ys.size(0)
                val_acc = correct / total
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path)

    best_model = ClassifierHead(
        in_dim=image_train.shape[-1],
        n_classes=n_classes,
        h_dim=int(image_train.shape[-1]/2),
        p_drop=0,
    ).to(device)
    best_model.load_state_dict(torch.load(best_model_path))

    best_model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, ys in dataloader_test:
            preds = model(images.to(device))
            predicted = preds.argmax(dim=1)
            correct += (predicted.cpu() == ys).sum().item()
            total += ys.size(0)
    test_acc = correct / total

    print("accuracy", test_acc)
    accuracy_scores.append(test_acc)


accuracy 0.6322751322751323
accuracy 0.626984126984127


KeyboardInterrupt: 

In [None]:
# get mean and std of accuracy scores
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)
print("Mean Accuracy:", mean_accuracy)
print("Std Accuracy:", std_accuracy)

Mean Accuracy: 0.8380434782608696
Std Accuracy: 0.01706902477604662


In [None]:
# mean_ovr = np.mean(auc_ovrs)
# std_ovr = np.std(auc_ovrs)

# mean_ovo = np.mean(auc_ovos)
# std_ovo = np.std(auc_ovos)

# print("Mean AUC OVR:", mean_ovr)
# print("Std AUC OVR:", std_ovr)
# print("Mean AUC OVO:", mean_ovo)
# print("Std AUC OVO:", std_ovo)