In [None]:
# this notebook calculates performance metrics on external MSK imaging test data
# (MSK ran the DFCI-student-imaging model on its own data, then shared deidentified outputs for evaluation)

In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils import data
from transformers import AutoTokenizer
from torch.utils.data import DataLoader

In [28]:
output_dataset = pd.read_csv('/mnt/d/Dropbox (Partners HealthCare)/dfci_mimic_imaging_bert/msk_imaging_data/msk_predictions_radiology_noPHI.csv')

In [29]:
output_dataset.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24472 entries, 0 to 24471
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   DMP_ID               24472 non-null  object 
 1   cancer_type          24472 non-null  object 
 2   any_cancer_label     24472 non-null  bool   
 3   response_label       24472 non-null  bool   
 4   progression_label    24472 non-null  bool   
 5   Bone                 24472 non-null  int64  
 6   CNS_Brain            24472 non-null  int64  
 7   Intra-Abdominal      24472 non-null  int64  
 8   Liver                24472 non-null  int64  
 9   Lung                 24472 non-null  int64  
 10  Other                24472 non-null  int64  
 11  Pleura               24472 non-null  int64  
 12  Reproductive_Organs  24472 non-null  int64  
 13  adrenal_glands       24472 non-null  int64  
 14  lymph_nodes          24472 non-null  int64  
 15  prob_any_cancer      24472 non-null 

In [30]:
output_dataset = output_dataset.rename(columns={'any_cancer_label':'any_cancer', 'response_label':'response', 'progression_label':'progression',
                                  'Bone':'bone_met', 'CNS_Brain':'brain_met', 'Intra-Abdominal':'peritoneal_met', 'Liver':'liver_met',
                                 'Lung':'lung_met', 'adrenal_glands':'adrenal_met', 'lymph_nodes':'node_met', 'prob_lung':'prob_lung_met',
                                               'prob_brain':'prob_brain_met','prob_bone':'prob_bone_met','prob_adrenal':'prob_adrenal_met',
                                               'prob_liver':'prob_liver_met','prob_node':'prob_node_met','prob_peritoneum':'prob_peritoneal_met'})

In [1]:
#output_dataset

In [32]:
from utils_102023 import eval_model

In [33]:
for outcome in ['any_cancer','progression','response','brain_met','bone_met','adrenal_met','liver_met','lung_met','node_met','peritoneal_met']:
    print('all cancers')
    print(outcome)
    print(eval_model(output_dataset['prob_' + outcome], output_dataset[outcome], graph=False))
    print("\n")

all cancers
any_cancer
AUC 0.9872538656321208
Outcome probability: 0.7824043805165086
Average precision score: 1.00
Best F1: 0.9721997441854395
Best F1 threshold: 0.2352055013179779
0.2352055013179779


all cancers
progression
AUC 0.9704799735551629
Outcome probability: 0.3877492644655116
Average precision score: 0.96
Best F1: 0.8912105618795156
Best F1 threshold: 0.3986702859401703
0.3986702859401703


all cancers
response
AUC 0.9738039935985858
Outcome probability: 0.12732919254658384
Average precision score: 0.88
Best F1: 0.8054941585096305
Best F1 threshold: 0.4937248826026916
0.4937248826026916


all cancers
brain_met
AUC 0.9892708938752353
Outcome probability: 0.015691402419091206
Average precision score: 0.83
Best F1: 0.7872078720787208
Best F1 threshold: 0.1955713033676147
0.1955713033676147


all cancers
bone_met
AUC 0.9912585432486332
Outcome probability: 0.25257437070938216
Average precision score: 0.98
Best F1: 0.9331122166943062
Best F1 threshold: 0.3544026911258697
0.3544

In [34]:
for cancer in output_dataset.cancer_type.unique():
    subset = output_dataset[output_dataset.cancer_type == cancer]
    for outcome in ['any_cancer','progression','response','brain_met','bone_met','adrenal_met','liver_met','lung_met','node_met','peritoneal_met']:
        print(cancer)
        print(outcome)
        print(eval_model(subset['prob_' + outcome], subset[outcome], graph=False))
        print("\n")
    

brca
any_cancer
AUC 0.9865254799540846
Outcome probability: 0.8551816958277254
Average precision score: 1.00
Best F1: 0.9768025078369905
Best F1 threshold: 0.1746265441179275
0.1746265441179275


brca
progression
AUC 0.9602610362535804
Outcome probability: 0.41938088829071335
Average precision score: 0.95
Best F1: 0.8807631160572338
Best F1 threshold: 0.4114409685134887
0.4114409685134887


brca
response
AUC 0.960192271178574
Outcome probability: 0.12570659488559893
Average precision score: 0.77
Best F1: 0.7344064386317909
Best F1 threshold: 0.4896712899208069
0.4896712899208069


brca
brain_met
AUC 0.9821990737927094
Outcome probability: 0.03714670255720054
Average precision score: 0.86
Best F1: 0.7999999999999999
Best F1 threshold: 0.1877919435501098
0.1877919435501098


brca
bone_met
AUC 0.9798061968361857
Outcome probability: 0.5580080753701211
Average precision score: 0.98
Best F1: 0.9373232799245993
Best F1 threshold: 0.3166042268276214
0.3166042268276214


brca
adrenal_met
AUC 0