In [1]:
import os
import sys

notebook_dir = os.getcwd()
project_root_path = os.path.dirname(notebook_dir)
sys.path.insert(0, project_root_path)

from src.preprocessing.Derm7pt import preprocessing_Derm7pt
from src.utils import *
from config import PROJECT_ROOT
from src.training import run_epoch_x_to_c

from src.utils import find_class_imbalance
from config import DERM7PT_CONFIG
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
N_TRIMMED_CONCEPTS = DERM7PT_CONFIG['N_TRIMMED_CONCEPTS']

In [3]:
torch.mps.empty_cache()

In [4]:
torch.manual_seed(42)
concept_labels, train_loader, val_loader, test_loader = preprocessing_Derm7pt(class_concepts=False, verbose=True)

Found 34 classes.
Found labels for 2022 images.
Generated one-hot matrix with shape: (2022, 34)
Total number of concept columns: 28
Found 2013 images.
Processing in 63 batches of size 32 (for progress reporting)...


Processing batches: 100%|███████████████████████| 63/63 [00:13<00:00,  4.57it/s]



Finished processing.
Successfully transformed: 2013 images.
Labels shape: (2013, 34)
Concepts shape: (2013, 28)
Image tensors length: 2013
Dataset initialized with 826 pre-sorted items.
Dataset initialized with 406 pre-sorted items.
Dataset initialized with 790 pre-sorted items.


**Find device to run model on (CPU or GPU).**

In [5]:
device = torch.device("cuda" if torch.cuda.is_available()
                    else "mps" if torch.backends.mps.is_available()
                    else "cpu")
print(f"Using device: {device}")

Using device: mps


### Loss


In [6]:
use_weighted_loss = True # Set to False for simple unweighted loss

if use_weighted_loss:
    concept_weights = find_class_imbalance(concept_labels)
    attr_criterion = [nn.BCEWithLogitsLoss(weight=torch.tensor([ratio], device=device, dtype=torch.float))
                    for ratio in concept_weights]
else:
    attr_criterion = [nn.BCEWithLogitsLoss() for _ in range(N_TRIMMED_CONCEPTS)]

In [7]:
def get_outputs_as_array(outputs, n_concepts):
    # Initialize an empty list to collect batches
    batch_results = []

    for i in range(len(outputs)):
        batch_size = outputs[i].shape[0]

        # Create a batch matrix with N_CONCEPTS number of columns
        batch_matrix = np.zeros((batch_size, n_concepts))

        for instance_idx in range(batch_size):
            # Extract, convert, and flatten data for the current concept
            instance_data = outputs[i][instance_idx].detach().cpu().numpy().flatten()
            batch_matrix[instance_idx, :] = instance_data

        # Add this consistently shaped batch matrix to our collection
        batch_results.append(batch_matrix)

    return np.vstack(batch_results)

# Load instance-based model

In [8]:
# model = torch.load(os.path.join(PROJECT_ROOT, 'models', 'CUB', 'instance_level_model.pth'), map_location=device, weights_only=False)
model = torch.load('x_to_c_best_model.pth', map_location=device, weights_only=False)

In [17]:
def get_outputs(loader, split_name):
    if loader:
        with torch.no_grad():
            shuffled_concept_labels = []
            shuffled_img_labels = []

            # Iterate through all batches
            for batch in loader:
                _, concept_labels, image_labels, _ = batch
                # Append batch labels to our list
                shuffled_concept_labels.append(concept_labels)
                shuffled_img_labels.append(image_labels)

            # Concatenate all batches into a single tensor
            shuffled_concept_labels = torch.cat(shuffled_concept_labels, dim=0)
            shuffled_img_labels = torch.cat(shuffled_img_labels, dim=0)

            test_loss, test_acc, outputs = run_epoch_x_to_c(
                model, loader, attr_criterion, optimizer=None, n_concepts=N_TRIMMED_CONCEPTS, device=device,
                return_outputs='sigmoid', verbose=True
            )

    # print(f"Shuffled labels shape: {shuffled_img_labels.shape}")
    np.save(os.path.join(PROJECT_ROOT, 'output', 'Derm7pt', f'C_{split_name}.npy'), shuffled_concept_labels)
    np.save(os.path.join(PROJECT_ROOT, 'output', 'Derm7pt', f'Y_{split_name}.npy'), shuffled_img_labels)
    print(f'Best Model Summary   | Loss: {test_loss:.4f} | Acc: {test_acc:.3f}')

    output_array = get_outputs_as_array(outputs, N_TRIMMED_CONCEPTS)
    print(f"Final shape: {output_array.shape}")

    np.save(os.path.join(PROJECT_ROOT, 'output', 'Derm7pt', f'C_hat_sigmoid_{split_name}.npy'), output_array)

In [18]:
get_outputs(train_loader, 'train')

                                                                                

Best Model Summary   | Loss: 18.8948 | Acc: 83.535
Final shape: (826, 28)




In [19]:
get_outputs(val_loader, 'val')

                                                                                

Best Model Summary   | Loss: 30.5621 | Acc: 82.002
Final shape: (406, 28)




In [20]:
get_outputs(test_loader, 'test')

                                                                                

Best Model Summary   | Loss: 32.6961 | Acc: 82.505
Final shape: (790, 28)


