In [1]:
# model loader - imagenet

import sys
sys.path.append("../.")
sys.path.append("../latent-diffusion")
sys.path.append('../taming-transformers')

import torch
from omegaconf import OmegaConf

from ldm.util import instantiate_from_config


def load_model_from_config(config, ckpt):
    print(f"Loading model from {ckpt}")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # device = torch.device('cpu')
    pl_sd = torch.load(ckpt, weights_only=False, map_location=torch.device('cpu'))
    sd = pl_sd["state_dict"]
    torch.save(sd, './tmp_sd')
    model = instantiate_from_config(config.model)
    m, u = model.load_state_dict(torch.load('./tmp_sd', map_location=device), strict=False)
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    return model, sd, pl_sd


def get_model(model_config_path, model_ckpt_path):
    config = OmegaConf.load(model_config_path)
    model, sd, pl_sd = load_model_from_config(config, model_ckpt_path)
    return model, sd, pl_sd


In [2]:
# confusion matrix plotter

from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def labels_to_human_labels(labels, human_labels_list):
    human_labels = [human_labels_list[int(x.detach().cpu().numpy())] for x in labels]  
    return human_labels

def plot_confusion_matrix(true_labels, labels_pred, human_labels_list):
    true_labels_readable = labels_to_human_labels(true_labels, human_labels_list)
    labels_pred_readable = labels_to_human_labels(labels_pred, human_labels_list)
    cm = confusion_matrix(true_labels_readable, labels_pred_readable, labels=human_labels_list)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                                  display_labels=human_labels_list)
    fig, ax = plt.subplots(figsize=(12,12))
    disp.plot(ax=ax)
    plt.show()

ModuleNotFoundError: No module named 'sklearn'

In [6]:
import numpy as np
import torch
# set a fixed random seed (both numpy and torch)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fc5b1e324f0>

In [7]:
# utilities

import os
from enum import Enum
import pickle

from diffusion_classifier.ldm_classifier_imagenet import LdmClassifier

def get_training_ckpt_files(output_dir, training_name):
  ckpt_dir = os.path.join(output_dir, training_name, 'checkpoints')
  training_ckpt_files = [os.path.join(ckpt_dir, ckpt) for ckpt in os.listdir(ckpt_dir) if 'epoch' in ckpt]
  return training_ckpt_files

def strip_epoch_num_from_ckpt(ckpt_full_path):
  ckpt_name = ckpt_full_path.split('/')[-1]
  epoch_num = 1 + int(ckpt_name.split(".")[0].split("=")[-1])
  return epoch_num

def get_training_cfg_file(output_dir, training_name):
  cfg_dir = os.path.join(output_dir, training_name, 'configs')
  model_cfg_files = [cfg for cfg in os.listdir(cfg_dir) if 'project' in cfg]
  if len(model_cfg_files) == 0:
    raise ValueError("configs dir empty, you may manualy pass the config file instead")
  if len(model_cfg_files) > 1:
    raise ValueError("more than 1 config file in configs dir, you may manualy pass the config file instead")
  
  return os.path.join(cfg_dir, model_cfg_files[0])


def evaluate_accuracy_over_epochs(output_dir, 
                                  training_name, 
                                  dataset, 
                                  t_sampling_stride = 50,
                                  n_trials = 1
                                 ):
    # create classification results dir under the training dir
    clf_dir = os.path.join(output_dir, training_name, 'classification')
    if not os.path.exists(clf_dir):
        os.makedirs(clf_dir)
    n_pred_files = len(os.listdir(clf_dir))

    clf_res_per_epoch = {'dataset': dataset}
    # prepare files
    training_ckpt_files = get_training_ckpt_files(output_dir, training_name)
    cfg_file = get_training_cfg_file(output_dir, training_name)

    # loop over ckpts
    for ckpt_file in training_ckpt_files:
        epoch_num = strip_epoch_num_from_ckpt(ckpt_file)
        # load model
        model, _, _ = get_model(cfg_file, ckpt_file)
        # instantiate ldm classifier
        ldm_clf = LdmClassifier(model)    
        # run classification
        l2_labels_pred, l1_labels_pred, true_labels = ldm_clf.classify_dataset(dataset=ds,
                                                                               batch_size=1,
                                                                               n_trials=n_trials,
                                                                               t_sampling_stride=t_sampling_stride)
        # save results
        clf_res_per_epoch[epoch_num] = {
          'true_labels': true_labels,
          'l1_pred_labels': l1_labels_pred,
          'l2_pred_labels': l2_labels_pred,
        }
        with open(os.path.join(clf_dir, f'predictions_{n_pred_files}'), 'wb') as f:
            pickle.dump(clf_res_per_epoch, f)
        
        # delete model
        del model
        del ldm_clf

    return ldm_clf



In [8]:
# more imports
import sys
sys.path.append('./latent-diffusion/ldm/data/')
from ct_rsna import CTDataset, CTSubset
from torchvision import transforms
import torch
from matplotlib import pyplot as plt
import numpy as np

In [None]:
# prepare data
train_dir = './data/ct-rsna/train'
val_dir = './data/ct-rsna/validation'

subset_len = 1
ds = CTSubset(data_dir=val_dir, labels_file='validation_set_dropped_nans.csv', size=256, flip_prob=0., subset_len=subset_len)

# LDM classifier params
t_sampling_stride = 50
n_trials = 1

# training dir
output_dir = './data/outputs'
training_name = '2024-05-10T17-04-36_imagenet-1024'

# evaluate
clf_res_per_epoch = evaluate_accuracy_over_epochs(output_dir, 
                                                  training_name, 
                                                  ds, 
                                                  t_sampling_stride,
                                                  n_trials)


Data source: validation_set_dropped_nans.csv
    Class none: 0.0%
    Class epidural: 0.0%
    Class intraparenchymal: 0.0%
    Class intraventricular: 100.0%
    Class subarachnoid: 0.0%
    Class subdural: 0.0%
Loading model from ./data/outputs/2024-05-10T17-04-36_imagenet-1024/checkpoints/epoch=000004.ckpt
LatentDiffusion: Running in eps-prediction mode
DiffusionWrapper has 400.92 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 3, 64, 64) = 12288 dimensions.
making attention of type 'vanilla' with 512 in_channels


dataset samples:   0%|          | 0/1 [00:00<?, ?it/s]

class hypothsis:   0%|          | 0/6 [00:00<?, ?it/s]

diffusion sampling:   0%|          | 0/2 [00:00<?, ?it/s]

diffusion sampling:   0%|          | 0/2 [00:00<?, ?it/s]

diffusion sampling:   0%|          | 0/2 [00:00<?, ?it/s]

diffusion sampling:   0%|          | 0/2 [00:00<?, ?it/s]

diffusion sampling:   0%|          | 0/2 [00:00<?, ?it/s]

diffusion sampling:   0%|          | 0/2 [00:00<?, ?it/s]

Loading model from ./data/outputs/2024-05-10T17-04-36_imagenet-1024/checkpoints/epoch=000009.ckpt
