In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import pytorch_lightning as pl
from fdg_simple_classifier import SimpleDataModule, SimpleModel

classifier = SimpleModel.load_from_checkpoint('./data_and_models/fdg_auc_roc=0.86.ckpt')
dm = SimpleDataModule()
dm.setup()
test_loader = dm.test_dataloader()
external_test_loader = dm.external_test_dataloader()
internal_test_loader = dm.internal_test_dataloader()
trainer = pl.Trainer()


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


AD dataset:
Patients
Total: 633, Train: 506, Val: 63, Test: 64
Scans
Total: 1557, Train: 1221, Val: 163, Test: 173
CN dataset:
Patients
Total: 266, Train: 212, Val: 27, Test: 27
Scans
Total: 632, Train: 498, Val: 70, Test: 64
Patients
Total: 899, Train: 719, Val: 90, Test: 90
Scans
Total: 2189, Train: 1753, Val: 222, Test: 214
OASIS3 AD dataset:
Patients: 7
Scans: 9
OASIS3 CN dataset:
Patients: 88
Scans: 99


In [4]:
internal_summary = trainer.test(classifier, dataloaders=test_loader)
internal_gt = classifier.val_gt
internal_preds = classifier.val_pred_AD_prob

You are using a CUDA device ('NVIDIA GeForce RTX 4090 D') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\programs\Anaconda\envs\neuroimaging\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing DataLoader 0: 100%|██████████| 27/27 [00:02<00:00, 11.69it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      Val/accuracy           0.827102780342102
       Val/auc_roc          0.9107847213745117
         Val/f1             0.8868501782417297
         Val/fn                    26.0
         Val/fp                    11.0
      Val/precision         0.9294871687889099
       Val/recall            0.847953200340271
         Val/tn                    32.0
         Val/tp                    145.0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────




In [5]:
internal_with_dccc_summary = trainer.test(classifier, dataloaders=internal_test_loader)
internal_with_dccc_gt = classifier.val_gt
internal_with_dccc_preds = classifier.val_pred_AD_prob

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 27/27 [00:05<00:00,  4.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      Val/accuracy          0.8644859790802002
       Val/auc_roc          0.9016727805137634
         Val/f1             0.9144542813301086
         Val/fn                    16.0
         Val/fp                    13.0
      Val/precision         0.9226190447807312
       Val/recall           0.9064327478408813
         Val/tn                    30.0
         Val/tp                    155.0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [6]:
external_summary = trainer.test(classifier, dataloaders=external_test_loader)
external_gt = classifier.val_gt
external_preds = classifier.val_pred_AD_prob

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\programs\Anaconda\envs\neuroimaging\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing DataLoader 0: 100%|██████████| 14/14 [00:02<00:00,  6.33it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      Val/accuracy          0.7870370149612427
       Val/auc_roc          0.8092031478881836
         Val/f1             0.34285715222358704
         Val/fn                     3.0
         Val/fp                    20.0
      Val/precision         0.23076923191547394
       Val/recall           0.6666666865348816
         Val/tn                    79.0
         Val/tp                     6.0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [7]:
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as st
from sklearn import metrics

class DelongTest():
    def __init__(self,preds1,preds2,label,threshold=0.05):
        '''
        preds1:the output of model1
        preds2:the output of model2
        label :the actual label
        '''
        self._preds1=preds1
        self._preds2=preds2
        self._label=label
        self.threshold=threshold
        self._show_result()

    def _auc(self,X, Y)->float:
        return 1/(len(X)*len(Y)) * sum([self._kernel(x, y) for x in X for y in Y])

    def _kernel(self,X, Y)->float:
        '''
        Mann-Whitney statistic
        '''
        return .5 if Y==X else int(Y < X)

    def _structural_components(self,X, Y)->list:
        V10 = [1/len(Y) * sum([self._kernel(x, y) for y in Y]) for x in X]
        V01 = [1/len(X) * sum([self._kernel(x, y) for x in X]) for y in Y]
        return V10, V01

    def _get_S_entry(self,V_A, V_B, auc_A, auc_B)->float:
        return 1/(len(V_A)-1) * sum([(a-auc_A)*(b-auc_B) for a,b in zip(V_A, V_B)])
    
    def _z_score(self,var_A, var_B, covar_AB, auc_A, auc_B):
        return (auc_A - auc_B)/((var_A + var_B - 2*covar_AB )**(.5)+ 1e-8)

    def _group_preds_by_label(self,preds, actual)->list:
        X = [p for (p, a) in zip(preds, actual) if a]
        Y = [p for (p, a) in zip(preds, actual) if not a]
        return X, Y

    def _compute_z_p(self):
        X_A, Y_A = self._group_preds_by_label(self._preds1, self._label)
        X_B, Y_B = self._group_preds_by_label(self._preds2, self._label)

        V_A10, V_A01 = self._structural_components(X_A, Y_A)
        V_B10, V_B01 = self._structural_components(X_B, Y_B)

        auc_A = self._auc(X_A, Y_A)
        auc_B = self._auc(X_B, Y_B)

        # Compute entries of covariance matrix S (covar_AB = covar_BA)
        var_A = (self._get_S_entry(V_A10, V_A10, auc_A, auc_A) * 1/len(V_A10)+ self._get_S_entry(V_A01, V_A01, auc_A, auc_A) * 1/len(V_A01))
        var_B = (self._get_S_entry(V_B10, V_B10, auc_B, auc_B) * 1/len(V_B10)+ self._get_S_entry(V_B01, V_B01, auc_B, auc_B) * 1/len(V_B01))
        covar_AB = (self._get_S_entry(V_A10, V_B10, auc_A, auc_B) * 1/len(V_A10)+ self._get_S_entry(V_A01, V_B01, auc_A, auc_B) * 1/len(V_A01))

        # Two tailed test
        z = self._z_score(var_A, var_B, covar_AB, auc_A, auc_B)
        p = st.norm.sf(abs(z))*2

        return z,p

    def _show_result(self):
        z,p=self._compute_z_p()
        print(f"z score = {z:.5f};\np value = {p:.5f};")
        if p < self.threshold :print("There is a significant difference")
        else:        print("There is NO significant difference")


# Model A (random) vs. "good" model B
DelongTest(internal_preds,internal_with_dccc_preds,internal_gt)


# 计算DeLong检验
# auc1, auc2, auc_diff, p_value = delong_roc_test(internal_gt, internal_preds, internal_with_dccc_preds)
# print(f"AUC1: {auc1:.3f}, AUC2: {auc2:.3f}, AUC差异: {auc_diff:.3f}, p值: {p_value:.3f}")

z score = 0.90848;
p value = 0.36363;
There is NO significant difference


  return .5 if Y==X else int(Y < X)


<__main__.DelongTest at 0x207acb47b80>