In [1]:
!nvidia-smi

Fri Mar  3 14:11:00 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-DGXS...  On   | 00000000:07:00.0  On |                    0 |
| N/A   52C    P0   214W / 300W |  27304MiB / 32505MiB |     99%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-DGXS...  On   | 00000000:08:00.0 Off |                    0 |
| N/A   57C    P0   288W / 300W |  19592MiB / 32508MiB |     91%      Default |
|       

In [2]:
import sys
sys.path.append('../')

In [3]:
import os
import sys
import json
import time
import rich
import numpy as np
import pickle
import wandb
import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

from configs.finetune import FinetuneConfig
from tasks.classification import Classification

from models.backbone.base import calculate_out_features
from models.backbone.densenet import DenseNetBackbone
from models.backbone.resnet import build_resnet_backbone
from models.head.projector import MLPHead
from models.head.classifier import LinearClassifier

from datasets.brain import BrainProcessor, Brain, BrainMoCo
from datasets.transforms import make_transforms, compute_statistics

from utils.logging import get_rich_logger
from utils.gpu import set_gpu

from easydict import EasyDict as edict
from torch.utils.data import DataLoader, Subset

import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns

import nibabel as nib
from skimage.transform import resize

from copy import deepcopy

from sklearn.ensemble import RandomForestClassifier   

In [4]:
hashs =[("2022-07-02_08-00-31", "2022-07-03_13-41-32"),
        ("2022-07-02_08-00-57", "2022-07-03_13-37-29"),
        ("2022-07-02_09-38-52", "2022-07-03_13-33-23"),
        ("2022-07-02_09-40-42", "2022-07-03_13-29-10"),
        ("2022-07-02_11-17-38", "2022-07-03_13-25-05"),
        ("2022-07-02_11-20-21", "2022-07-03_13-21-00"),
        ("2022-07-02_17-15-14", "2022-07-03_13-16-54"),
        ("2022-07-02_17-15-34", "2022-07-03_13-12-44"),
        ("2022-07-02_18-53-46", "2022-07-03_13-08-35"),
        ("2022-07-02_18-54-27", "2022-07-03_13-04-32")]

In [11]:
from torchmetrics.functional.classification import accuracy, auroc
from torchmetrics.functional import precision, recall

from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.metrics import roc_curve


def classification_result(y_true, y_pred, mc):
    auroc_ = roc_auc_score(y_true, y_pred[:, 1])

    fpr, tpr, thresholds = roc_curve(y_true, y_pred[:, 1])
    dist = fpr ** 2 + (1 - tpr) ** 2
    threshold = thresholds[np.argmin(dist)]

    # total
    cm = confusion_matrix(y_true=y_true,
                          y_pred=y_pred[:, 1] > threshold,
                          labels=[0, 1])
    n00, n01, n10, n11 = cm.reshape(-1, ).tolist()

    accuracy_ = (n00 + n11) / (n00 + n01 + n10 + n11)
    sensitivity_ = n11 / (n11 + n10 + 1e-7)
    specificity_ = n00 / (n00 + n01 + 1e-7)
    precision_ = n11 / (n11 + n01 + 1e-7)
    f1_ = (2 * precision_ * sensitivity_) / (precision_ + sensitivity_ + 1e-7)
    gmean_ = np.sqrt(sensitivity_ * specificity_)

    result_total = dict(acc=accuracy_, auroc=auroc_, sens=sensitivity_, spec=specificity_, prec=precision_,
                        f1=f1_, gmean=gmean_, threshold=threshold)
    
    
    # below
    auroc_ = roc_auc_score(y_true[test['mc'] < 37], y_pred[test['mc'] < 37, 1], labels=[0, 1])
    
    # total
    cm = confusion_matrix(y_true=y_true[test['mc'] < 37],
                          y_pred=y_pred[test['mc'] < 37, 1] > threshold,
                          labels=[0, 1])
    n00, n01, n10, n11 = cm.reshape(-1, ).tolist()

    accuracy_ = (n00 + n11) / (n00 + n01 + n10 + n11)
    sensitivity_ = n11 / (n11 + n10 + 1e-7)
    specificity_ = n00 / (n00 + n01 + 1e-7)
    precision_ = n11 / (n11 + n01 + 1e-7)
    f1_ = (2 * precision_ * sensitivity_) / (precision_ + sensitivity_ + 1e-7)
    gmean_ = np.sqrt(sensitivity_ * specificity_)

    result_below = dict(acc=accuracy_, auroc=auroc_, sens=sensitivity_, spec=specificity_, prec=precision_,
                        f1=f1_, gmean=gmean_, threshold=threshold)

    auroc_ = roc_auc_score(y_true[test['mc'] >= 37], y_pred[test['mc'] >= 37, 1], labels=[0, 1])
    
    # above
    cm = confusion_matrix(y_true=y_true[test['mc'] >= 37],
                          y_pred=y_pred[test['mc'] >= 37, 1] > threshold,
                          labels=[0, 1])
    n00, n01, n10, n11 = cm.reshape(-1, ).tolist()

    accuracy_ = (n00 + n11) / (n00 + n01 + n10 + n11)
    sensitivity_ = n11 / (n11 + n10 + 1e-7)
    specificity_ = n00 / (n00 + n01 + 1e-7)
    precision_ = n11 / (n11 + n01 + 1e-7)
    f1_ = (2 * precision_ * sensitivity_) / (precision_ + sensitivity_ + 1e-7)
    gmean_ = np.sqrt(sensitivity_ * specificity_)

    result_above = dict(acc=accuracy_, auroc=auroc_, sens=sensitivity_, spec=specificity_, prec=precision_,
                        f1=f1_, gmean=gmean_, threshold=threshold)
    
    return result_total, result_below, result_above

In [12]:
gpus = ['3']
server = 'dgx'

In [13]:
result = {
    'SMoCo': {'total': [], '<=37': [], '>37': []},
    'MC+Hippocampus': {'total': [], '<=37': [], '>37': []}
}
os.makedirs('scatter2', exist_ok=True)

for hash in hashs:
    print(hash)
    
    #######################
    config = edict()
    config.server = server
    config.gpus = gpus
    local_rank = 0

    config.finetune_file = os.path.join(f'../checkpoints/pet-supmoco/resnet/{hash[0]}/finetune/{hash[1]}/ckpt.last.pth.tar')
    finetune_config = os.path.join(f'../checkpoints/pet-supmoco/resnet/{hash[0]}/finetune/{hash[1]}/configs.json')
    with open(finetune_config, 'rb') as fb:
        finetune_config = json.load(fb)

    finetune_config_names = [
        # data_parser
        'data_type', 'root', 'data_info', 'mci_only', 'n_splits', 'n_cv',
        'image_size', 'small_kernel', 'random_state',
        'intensity', 'crop', 'crop_size', 'rotate', 'flip', 'affine', 'blur', 'blur_std', 'prob',
        # model_parser
        'backbone_type', 'init_features', 'growth_rate', 'block_config', 'bn_size', 'dropout_rate',
        'arch', 'no_max_pool',
        # train
        'batch_size',
        # moco / supmoco
        'alphas',
        # others
        'task', 'projector_dim'
    ]

    for name in finetune_config_names:
        if name in finetune_config.keys():
            setattr(config, name, finetune_config[name])

    #########################################
    set_gpu(config)
    np.random.seed(config.random_state)
    torch.manual_seed(config.random_state)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.allow_tf32 = True
    torch.cuda.set_device(local_rank)

    # Networks
    if config.backbone_type == 'densenet':
        backbone = DenseNetBackbone(in_channels=1,
                                    init_features=config.init_features,
                                    growth_rate=config.growth_rate,
                                    block_config=config.block_config,
                                    bn_size=config.bn_size,
                                    dropout_rate=config.dropout_rate,
                                    semi=False)
        activation = True
    elif config.backbone_type == 'resnet':
        backbone = build_resnet_backbone(arch=config.arch,
                                         no_max_pool=config.no_max_pool,
                                         in_channels=1,
                                         semi=False)
        activation = False
    else:
        raise NotImplementedError

    if config.small_kernel:
        backbone._fix_first_conv()

    if config.crop:
        out_dim = calculate_out_features(backbone=backbone, in_channels=1, image_size=config.crop_size)
    else:
        out_dim = calculate_out_features(backbone=backbone, in_channels=1, image_size=config.image_size)
    classifier = LinearClassifier(in_channels=out_dim, num_classes=2, activation=activation)

    backbone.load_weights_from_checkpoint(path=config.finetune_file, key='backbone')
    classifier.load_weights_from_checkpoint(path=config.finetune_file, key='classifier')
    
    # load finetune data
    data_processor = BrainProcessor(root=config.root,
                                    data_info=config.data_info,
                                    data_type=config.data_type,
                                    mci_only=config.mci_only,
                                    random_state=config.random_state)
    datasets = data_processor.process(n_splits=config.n_splits, n_cv=config.n_cv)

    # intensity normalization
    assert config.intensity in [None, 'scale', 'minmax']
    mean_std, min_max = (None, None), (None, None)
    if config.intensity is None:
        pass
    elif config.intensity == 'scale':
        pass
    elif config.intensity == 'minmax':
        with open(os.path.join(config.root, 'labels/minmax.pkl'), 'rb') as fb:
            minmax_stats = pickle.load(fb)
            min_max = (minmax_stats[config.data_type]['min'], minmax_stats[config.data_type]['max'])
    else:
        raise NotImplementedError

    train_transform, test_transform = make_transforms(image_size=config.image_size,
                                                      intensity=config.intensity,
                                                      min_max=min_max,
                                                      crop_size=config.crop_size,
                                                      rotate=config.rotate,
                                                      flip=config.flip,
                                                      affine=config.affine,
                                                      blur_std=config.blur_std,
                                                      prob=config.prob)

    #########################################
    train_set = Brain(dataset=datasets['train'], data_type=config.data_type, transform=test_transform)
    test_set = Brain(dataset=datasets['test'], data_type=config.data_type, transform=test_transform)

    train_loader = DataLoader(dataset=train_set, batch_size=16, drop_last=False)
    test_loader = DataLoader(dataset=test_set, batch_size=16, drop_last=False)

    #########################
    train = {'y': [], 'mc': [], 'hippocampus': []}
    test = {'y': [], 'mc': [], 'hippocampus': [], 'logit': []}

    import pandas as pd
    mc_table = pd.read_excel('/raidWorkspace/mingu/Data/ADNI/labels/AV45_FBP_SUVR.xlsx', sheet_name='list_id_SUVR_RSF')
    mri_table = pd.read_csv('/raidWorkspace/mingu/Data/ADNI/labels/MRI_BAI_features.csv')
    mri_table_filenames = mri_table['Filename'].str.split('/', expand=True).iloc[:, 1].values

    ########################
    backbone.to(local_rank)
    classifier.to(local_rank)
    
    with torch.no_grad():
        for mode, dset, loader in zip(['train', 'test'], [train_set, test_set], [train_loader, test_loader]):
            for batch in tqdm.tqdm(loader):
                
                y = batch['y'].long().to(local_rank)
                
                if mode == 'train':
                    train['y'] += [batch['y'].detach().cpu().numpy()]
                else:
                    test['y'] += [batch['y'].detach().cpu().numpy()]

                idx = batch['idx'].cpu().numpy()        
                for i in idx:
                    mri_id = dset.mri[i].split('/')[-1].replace('.pkl', '')
                    pet_id = dset.pet[i].split('/')[-1].replace('.pkl', '')

                    # hippocampus volume            
                    temp = mri_table.iloc[np.where(mri_table_filenames == mri_id)[0].item(), :]
                    volume = temp['Left-Hippocampus'] + temp['Right-Hippocampus']

                    # NC
                    temp = mc_table.iloc[np.where(mc_table['ID'] == pet_id)[0].item(), :]
                    mc = temp['MC']
                    mc = 53.6 * mc - 43.2

                    if mode == 'train':
                        train['hippocampus'].append(volume)
                        train['mc'].append(mc)
                    else:
                        test['hippocampus'].append(volume)
                        test['mc'].append(mc)
                
                # logits (test-only)
                if mode == 'test':
                    logit = classifier(backbone(batch['x'].float().to(local_rank)))
                    test['logit'].append(logit.detach().cpu().numpy())
                
    for k, v in train.items():
        try:
            train[k] = np.concatenate(v)
        except:
            train[k] = np.array(v)

    for k, v in test.items():
        try:
            test[k] = np.concatenate(v)
        except:
            test[k] = np.array(v)
    
    # SMoCo
    y_test_pred_smoco = torch.tensor(test['logit']).softmax(dim=1).numpy()
    
    # Random Forest - RF + Hippocampus
    rf = RandomForestClassifier()
    
    X_train = np.concatenate([train['mc'].reshape(-1, 1), train['hippocampus'].reshape(-1, 1)], axis=1)
    X_test = np.concatenate([test['mc'].reshape(-1, 1), test['hippocampus'].reshape(-1, 1)], axis=1)

    rf.fit(X_train, train['y'])
    y_test_pred_rf = rf.predict_proba(X_test)
    
    # classification result
    result_smoco_total, result_smoco_below, result_smoco_above = classification_result(y_true=test['y'],
                                                                                       y_pred=y_test_pred_smoco,
                                                                                       mc=test['mc'])
    result_rf_total, result_rf_below, result_rf_above = classification_result(y_true=test['y'],
                                                                              y_pred=y_test_pred_rf,
                                                                              mc=test['mc'])
    
    result['SMoCo']['total'].append(result_smoco_total)
    result['SMoCo']['<=37'].append(result_smoco_below)
    result['SMoCo']['>37'].append(result_smoco_above)
    
    result['MC+Hippocampus']['total'].append(result_rf_total)
    result['MC+Hippocampus']['<=37'].append(result_rf_below)
    result['MC+Hippocampus']['>37'].append(result_rf_above)
    
    # scatter - classification probability plot
    fig, axs = plt.subplots(1, 2, figsize=(12, 5))
    axs = axs.ravel()

    prob = y_test_pred_smoco[:, 1]
    axs[0].scatter(test['mc'][test['y'] == 1], prob[test['y'] == 1], c='red', label='converter', s=12)
    axs[0].scatter(test['mc'][test['y'] == 0], prob[test['y'] == 0], c='blue', label='non-converter', s=12)

    axs[0].hlines(y=result_smoco_total['threshold'], xmin=test['mc'].min(), xmax=test['mc'].max(),
                  color='purple', linestyle='--', label='SMoCo')
    axs[0].vlines(x=37.0, ymin=prob.min(), ymax=prob.max(),
                  color='green', linestyle=':', label='MC = 37')

    axs[0].set_xlabel('MC', fontsize=12)
    axs[0].set_ylabel('Classification Probability', fontsize=12)
    axs[0].legend(loc='lower right')

    prob = y_test_pred_rf[:, 1]
    axs[1].scatter(test['mc'][test['y'] == 1], prob[test['y'] == 1], c='red', label='converter', s=12)
    axs[1].scatter(test['mc'][test['y'] == 0], prob[test['y'] == 0], c='blue', label='non-converter', s=12)

    axs[1].hlines(y=result_rf_total['threshold'], xmin=test['mc'].min(), xmax=test['mc'].max(),
                  color='purple', linestyle='--', label='MC+Hippocampus')
    axs[1].vlines(x=37.0, ymin=prob.min(), ymax=prob.max(),
                  color='green', linestyle=':', label='MC = 37')

    axs[1].set_xlabel('MC', fontsize=12)
    axs[1].set_ylabel('Classification Probability', fontsize=12)
    axs[1].legend(loc='lower right')

    plt.savefig(os.path.join('scatter2', f'{hash[0]}-{hash[1]}.png'), bbox_inches='tight', dpi=300)
    plt.close()

('2022-07-02_08-00-31', '2022-07-03_13-41-32')


100%|███████████████████████████████████████████████████████████████████████████████████| 36/36 [00:37<00:00,  1.03s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.08s/it]


ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [16]:
with open(os.path.join('scatter2', 'result.pkl'), 'wb') as fb:
    pickle.dump(result, fb)

In [17]:
os.path.join('scatter', f'{hash[0]}-{hash[1]}.png')

'scatter/2022-07-02_18-54-27-2022-07-03_13-04-32.png'

In [18]:
hash[0]

'2022-07-02_18-54-27'

In [19]:
result.keys()

dict_keys(['SMoCo', 'MC+Hippocampus'])

In [7]:
with open(os.path.join('scatter', 'result.pkl'), 'rb') as fb:
    result = pickle.load(fb)

In [10]:
result['SMoCo']['<=37']

[{'acc': 0.9615384615384616,
  'auroc': None,
  'sens': 0.0,
  'spec': 0.9615384578402366,
  'prec': 0.0,
  'f1': 0.0,
  'gmean': 0.0,
  'threshold': 0.9234854},
 {'acc': 0.8620689655172413,
  'auroc': 0.5512820512820513,
  'sens': 0.0,
  'spec': 0.9615384578402366,
  'prec': 0.0,
  'f1': 0.0,
  'gmean': 0.0,
  'threshold': 0.62396526},
 {'acc': 0.8695652173913043,
  'auroc': 0.5476190476190477,
  'sens': 0.0,
  'spec': 0.952380947845805,
  'prec': 0.0,
  'f1': 0.0,
  'gmean': 0.0,
  'threshold': 0.3723428},
 {'acc': 0.9333333333333333,
  'auroc': 0.9310344827586207,
  'sens': 0.9999999000000099,
  'spec': 0.9310344795481569,
  'prec': 0.3333333222222226,
  'f1': 0.4999999375000028,
  'gmean': 0.964901231445332,
  'threshold': 0.83461595},
 {'acc': 0.71875,
  'auroc': 0.52,
  'sens': 0.0,
  'spec': 0.91999999632,
  'prec': 0.0,
  'f1': 0.0,
  'gmean': 0.0,
  'threshold': 0.5186912},
 {'acc': 0.9090909090909091,
  'auroc': 0.6744186046511628,
  'sens': 0.0,
  'spec': 0.9302325559762034,

In [27]:
for key in result['SMoCo']['total'][0].keys():    
    print(key, np.mean([res[key] for res in result['SMoCo']['total']]))

acc 0.7862424280775355
auroc 0.8330115043840298
sens 0.7583821866205434
spec 0.7965252866528162
prec 0.5764278872555106
f1 0.650206496735805
gmean 0.7762484074852678
threshold 0.6446511


In [29]:
for key in result['SMoCo']['>37'][0].keys():    
    print(key, np.mean([res[key] for res in result['SMoCo']['>37']]))

acc 0.6721248382472522
auroc 0.7486757621463503
sens 0.8592095259316747
spec 0.4918589706909887
prec 0.6343409846335691
f1 0.7216673037093544
gmean 0.6330995169239462
threshold 0.6446511


In [30]:
for key in result['MC+Hippocampus']['total'][0].keys():    
    print(key, np.mean([res[key] for res in result['MC+Hippocampus']['total']]))

acc 0.7417454107333874
auroc 0.7926486630949039
sens 0.7260089984196675
spec 0.745203995143795
prec 0.5136419551435653
f1 0.5971941545712848
gmean 0.7331727850536153
threshold 0.28099999999999997


In [31]:
for key in result['MC+Hippocampus']['>37'][0].keys():    
    print(key, np.mean([res[key] for res in result['MC+Hippocampus']['>37']]))

acc 0.5810659139495702
auroc 0.6361210605162811
sens 0.8108656887542633
spec 0.3558974333668977
prec 0.5606323070925595
f1 0.6552763584654417
gmean 0.5201073347838319
threshold 0.28099999999999997
