In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils import data
from transformers import AutoTokenizer
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# pull in your dataset here. It should have a column labeled 'text' containing the full radiology report text (not just impression).
# if you have narrative reports separate from the impressions, would concatenate the impressions at the end of the narratives.
reports = pd.read_csv('/data/clin_notes_outcomes/profile_3-2023/derived_data/labeled_imaging_prissmm.csv')
reports['progression'] = np.where(reports.class_status==3,1,reports.progression)
reports = reports[reports.split=='test']
inference_input = reports
inference_input['text'] = inference_input['text'].str.lower().str.replace("\n", " ")
inference_input.drop(inference_input.filter(regex='Unnamed|outcome').columns, axis=1, inplace=True)


In [3]:
class UnLabeledDataset(data.Dataset):
    def __init__(self, pandas_dataset):
        self.data = pandas_dataset.copy()
        self.indices = self.data.index.unique()
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', truncation_side='left')        
        
        
    def __len__(self):
        # how many notes in the dataset
        return len(self.indices)
    
    def __getitem__(self, index):
        # get data for notes corresponding to indices passed
        this_index = self.indices[index]
        pand = self.data.loc[this_index, :]
    
        encoded = self.tokenizer(pand['text'], padding='max_length', truncation=True)

        x_text_tensor = torch.tensor(encoded.input_ids, dtype=torch.long)
        x_attention_mask = torch.tensor(encoded.attention_mask, dtype=torch.long)
        return x_text_tensor, x_attention_mask
        

In [4]:
from transformers import AutoModel
from torch.nn import functional as F
import torch.nn as nn
from torch.nn import Linear, Sequential, ReLU

   
class LabeledModel(nn.Module):

    def __init__(self):
        super(LabeledModel, self).__init__()
        
        self.bert = AutoModel.from_pretrained('bert-base-uncased')
        
        self.any_cancer_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.response_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.progression_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.brain_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.bone_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.adrenal_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.liver_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.lung_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.node_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.peritoneal_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))

        
    def forward(self, x_text_tensor, x_attention_mask):
        main = self.bert(x_text_tensor, x_attention_mask)
        main = main.last_hidden_state[:,0,:].squeeze(1)

                                          
        any_cancer_out = self.any_cancer_head(main)
        response_out = self.response_head(main)
        progression_out = self.progression_head(main)
        brain_out = self.brain_head(main)
        bone_out = self.bone_head(main)
        adrenal_out = self.adrenal_head(main)
        liver_out = self.liver_head(main)
        lung_out = self.lung_head(main)
        node_out = self.node_head(main)
        peritoneum_out = self.peritoneal_head(main)

        return any_cancer_out, response_out, progression_out, brain_out, bone_out, adrenal_out, liver_out, lung_out, node_out, peritoneum_out
        




In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [7]:
# write out the inference dataset
themodel = LabeledModel()
themodel.load_state_dict(torch.load('./pan_prissmm_full_imaging_bert.pt'))
themodel.to(device)

themodel.eval()

dataset = data.DataLoader(UnLabeledDataset(inference_input), batch_size=16, shuffle=False, num_workers=0)

output_true_lists = [[] for x in range(10)]
output_prediction_lists = [[] for x in range(10)]
for batch in dataset:

    x_text_ids = batch[0].to(device)
    x_attention_mask = batch[1].to(device)
    with torch.no_grad():
        predictions = themodel(x_text_ids, x_attention_mask)
    for j in range(10):
        output_prediction_lists[j].append(predictions[j].detach().cpu().numpy())

output_prediction_lists = [np.concatenate(x) for x in output_prediction_lists]


output_dataset = inference_input.copy()
for x in range(10):
    output_dataset['outcome_' + str(x) + '_logit'] = output_prediction_lists[x]


In [8]:
output_dataset=output_dataset.rename(columns={'outcome_0_logit' : 'any_cancer_logit',
                                              'outcome_1_logit' : 'response_logit',
                                              'outcome_2_logit' : 'progression_logit',
                                              'outcome_3_logit' : 'brain_met_logit',
                                              'outcome_4_logit' : 'bone_met_logit',
                                              'outcome_5_logit' : 'adrenal_met_logit',
                                              'outcome_6_logit' : 'liver_met_logit',
                                              'outcome_7_logit' : 'lung_met_logit',
                                              'outcome_8_logit' : 'node_met_logit',
                                              'outcome_9_logit' : 'peritoneal_met_logit'})

In [9]:

output_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3460 entries, 103 to 37183
Data columns (total 43 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   dfci_mrn                  3460 non-null   float64
 1   cancer_type               3460 non-null   object 
 2   image_scan_type           3460 non-null   float64
 3   date                      3460 non-null   object 
 4   head_imaged               3460 non-null   float64
 5   neck_imaged               3460 non-null   float64
 6   spine_imaged              3460 non-null   float64
 7   chest_imaged              3460 non-null   float64
 8   abdomen_imaged            3460 non-null   float64
 9   pelvis_imaged             3460 non-null   float64
 10  any_cancer                3460 non-null   int64  
 11  progression               3460 non-null   int64  
 12  response                  3460 non-null   int64  
 13  class_status              3459 non-null   float64
 14  brain_met 

In [10]:
from utils_102023 import eval_model

In [11]:
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve
from confidenceinterval.bootstrap import bootstrap_ci

def best_f1(y_true, y_score):
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_score)
    f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
    f1_scores = f1_scores[~np.isnan(f1_scores)]
    thresholds = thresholds[:-1]
    best_threshold = thresholds[np.argmax(f1_scores)]
    best_f1 = np.max(f1_scores)
    return(best_f1)

def eval_with_cis(y_true, y_score):
    random_generator = np.random.default_rng()
    print('auroc')
    print(bootstrap_ci(y_true=y_true,
                 y_pred=y_score,
                 metric=roc_auc_score,
                 confidence_level=0.95,
                 n_resamples=1000,
                 method='bootstrap_bca',
                 random_state=random_generator))
    
    print('auprc')
    print(bootstrap_ci(y_true=y_true,
                 y_pred=y_score,
                 metric=average_precision_score,
                 confidence_level=0.95,
                 n_resamples=1000,
                 method='bootstrap_bca',
                 random_state=random_generator))
    
    print('best f1')
    print(bootstrap_ci(y_true=y_true,
                 y_pred=y_score,
                 metric=best_f1,
                 confidence_level=0.95,
                 n_resamples=1000,
                 method='bootstrap_bca',
                 random_state=random_generator))


In [12]:
for outcome in ['any_cancer','progression','response','brain_met','bone_met','adrenal_met','liver_met','lung_met','node_met','peritoneal_met']:
    print('all cancers')
    print(outcome)
    print(eval_model(output_dataset[outcome + '_logit'], output_dataset[outcome], graph=False))
    print(eval_with_cis(output_dataset[outcome], output_dataset[outcome + '_logit']))
    print("\n")

all cancers
any_cancer
AUC 0.9743241398235375
Outcome probability: 0.5531791907514451
Average precision score: 0.98
Best F1: 0.926250644662197
Best F1 threshold: 0.112576514
0.112576514
auroc
(0.9743241398235375, (0.9699528927538046, 0.9787726000841815))
auprc
(0.9777452719133486, (0.9721187797623889, 0.9818232807551027))
best f1
(0.926250644662197, (0.9164233562633809, 0.9327319781168054))
None


all cancers
progression
AUC 0.9555354364786938
Outcome probability: 0.23265895953757226
Average precision score: 0.88
Best F1: 0.7982673267326732
Best F1 threshold: -0.14636283
-0.14636283
auroc
(0.9555354364786938, (0.9478418745021847, 0.962963847424754))
auprc
(0.8790154552560852, (0.8549951859101514, 0.8976836025874901))
best f1
(0.7982673267326732, (0.772102290877267, 0.8156482659152976))
None


all cancers
response
AUC 0.9741017978835665
Outcome probability: 0.06329479768786127
Average precision score: 0.79
Best F1: 0.7310344827586207
Best F1 threshold: -0.12320555
-0.12320555
auroc
(0.9

  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.6250000000000001, 0.7814462505639113))
None


all cancers
liver_met
AUC 0.9866442773804871
Outcome probability: 0.11242774566473988
Average precision score: 0.89
Best F1: 0.8480392156862745
Best F1 threshold: -0.37037826
-0.37037826
auroc
(0.9866442773804871, (0.9817398201190178, 0.9898947803851091))
auprc
(0.8892280745192813, (0.850015090016015, 0.919138551679776))
best f1
(0.8480392156862745, (0.817480395689013, 0.8698407358143903))
None


all cancers
lung_met
AUC 0.9792392097045892
Outcome probability: 0.15375722543352602
Average precision score: 0.91
Best F1: 0.8253676470588236
Best F1 threshold: 0.040446997
0.040446997
auroc
(0.9792392097045892, (0.9735766925381767, 0.9834244854236975))
auprc
(0.9053341895456811, (0.8791143757513215, 0.9234522809788154))
best f1
(0.8253676470588236, (0.79935765255392, 0.8477566217746257))
None


all cancers
node_met
AUC 0.974277370846152
Outcome probability: 0.11473988439306358
Average precision score: 0.82
Best F1: 0.779249

In [13]:
output_dataset['cancer_type'] = np.where(output_dataset.cancer_type.str.contains('nsclc'), 'nsclc', output_dataset.cancer_type)

In [14]:
for cancer in output_dataset.cancer_type.unique():
    subset = output_dataset[output_dataset.cancer_type == cancer]
    for outcome in ['any_cancer','progression','response','brain_met','bone_met','adrenal_met','liver_met','lung_met','node_met','peritoneal_met']:
        print(cancer)
        print(outcome)
        print(eval_model(subset[outcome + '_logit'], subset[outcome], graph=False))
        print(eval_with_cis(output_dataset[outcome], output_dataset[outcome + '_logit']))

        print("\n")
    

prostate
any_cancer
AUC 0.9857324449899185
Outcome probability: 0.5661252900232019
Average precision score: 0.99
Best F1: 0.9598393574297189
Best F1 threshold: 0.7858024
0.7858024
auroc
(0.9743241398235375, (0.9687763138003237, 0.9787593213408511))
auprc
(0.9777452719133486, (0.9720127016286741, 0.9817654889887368))
best f1
(0.926250644662197, (0.9166740064154364, 0.9332650998305988))
None


prostate
progression
AUC 0.967201166180758
Outcome probability: 0.20417633410672853
Average precision score: 0.89
Best F1: 0.8323699421965318
Best F1 threshold: 0.3227973
0.3227973
auroc
(0.9555354364786938, (0.9471527592167226, 0.9622133191377741))
auprc
(0.8790154552560852, (0.8548978297994049, 0.8970983672101236))
best f1
(0.7982673267326732, (0.7729756906213995, 0.8175290743687723))
None


prostate
response
AUC 0.9554494828957836
Outcome probability: 0.027842227378190254
Average precision score: 0.56
Best F1: 0.5925925925925926
Best F1 threshold: -1.4151888
-1.4151888
auroc
(0.9741017978835665,

  F1 = 2*((precision*recall)/(precision+recall))


(0.9925895684409616, (0.9873932192345345, 0.9956369778569149))
auprc
(0.9367140616631078, (0.8892230481563621, 0.9588227066942019))
best f1
(0.8997955010224948, (0.8666213085422839, 0.922489977370546))
None


prostate
bone_met
AUC 0.9846976162765636
Outcome probability: 0.48491879350348027
Average precision score: 0.98
Best F1: 0.9626168224299065
Best F1 threshold: 0.8293912
0.8293912
auroc
(0.9894491341991342, (0.9859827082283706, 0.9921590815824011))
auprc
(0.9557770212081992, (0.9392521006733265, 0.9667227982235217))
best f1
(0.9077380952380952, (0.8875984916935533, 0.9197251637537125))
None


prostate
adrenal_met
no outcome variation to calculate metrics
None
auroc
(0.9878546905828924, (0.9664720197952513, 0.9935363249926301))
auprc
(0.7448038481751987, (0.6177107150071952, 0.8260582628588997))
best f1


  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.6280338981267793, 0.7822445933596952))
None


prostate
liver_met
AUC 0.9865384615384615
Outcome probability: 0.03480278422273782
Average precision score: 0.76
Best F1: 0.7857142857142856
Best F1 threshold: 0.34859237
0.34859237
auroc
(0.9866442773804871, (0.9818523846269475, 0.9903594169452528))
auprc
(0.8892280745192813, (0.8464243385032029, 0.9160374376146578))
best f1
(0.8480392156862745, (0.816239776819134, 0.8696711327649208))
None


prostate
lung_met
AUC 0.9949763593380614
Outcome probability: 0.018561484918793503
Average precision score: 0.84
Best F1: 0.7692307692307693
Best F1 threshold: 2.2117133
2.2117133
auroc
(0.9792392097045892, (0.973486961051685, 0.9830750102835646))
auprc
(0.9053341895456811, (0.8815403626959575, 0.9257148761468512))
best f1
(0.8253676470588236, (0.7991949900368968, 0.8451879839475096))
None


prostate
node_met
AUC 0.9918370146796542
Outcome probability: 0.08584686774941995
Average precision score: 0.92
Best F1: 0.8767123287671232

  F1 = 2*((precision*recall)/(precision+recall))


(0.9878546905828924, (0.9667611790574231, 0.9933337904656248))
auprc
(0.7448038481751987, (0.6284037286630276, 0.8365762440519904))
best f1


  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.6191620035881591, 0.7814762847756401))
None


breast
liver_met
AUC 0.9951395812562313
Outcome probability: 0.1259259259259259
Average precision score: 0.94
Best F1: 0.912751677852349
Best F1 threshold: -1.1708647
-1.1708647
auroc
(0.9866442773804871, (0.9821301692995646, 0.9898142636590314))
auprc
(0.8892280745192813, (0.8459040561965955, 0.918721941667047))
best f1
(0.8480392156862745, (0.8190212960198856, 0.8693880932610475))
None


breast
lung_met
AUC 0.9945372128162411
Outcome probability: 0.09814814814814815
Average precision score: 0.96
Best F1: 0.9056603773584906
Best F1 threshold: 1.0713903
1.0713903
auroc
(0.9792392097045892, (0.9728332998248714, 0.9833941904013217))
auprc
(0.9053341895456811, (0.8808815540164989, 0.9225017647660787))
best f1
(0.8253676470588236, (0.7981815658972105, 0.8480813078740852))
None


breast
node_met
AUC 0.9901960784313726
Outcome probability: 0.05555555555555555
Average precision score: 0.83
Best F1: 0.7936507936507938
Best F1

  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.6164983965372219, 0.7842876647359032))
None


crc
liver_met
AUC 0.9805558236032037
Outcome probability: 0.21112929623567922
Average precision score: 0.93
Best F1: 0.9153846153846152
Best F1 threshold: -0.029237304
-0.029237304
auroc
(0.9866442773804871, (0.9815678939170538, 0.9900413486950219))
auprc
(0.8892280745192813, (0.8407101419092603, 0.9174125657843167))
best f1
(0.8480392156862745, (0.8185085173682787, 0.8695288570082759))
None


crc
lung_met
AUC 0.9832336655592471
Outcome probability: 0.1407528641571195
Average precision score: 0.92
Best F1: 0.8343558282208589
Best F1 threshold: 1.1935328
1.1935328
auroc
(0.9792392097045892, (0.9732638491851567, 0.983251775033784))
auprc
(0.9053341895456811, (0.8805932697714625, 0.924086248535104))
best f1
(0.8253676470588236, (0.7967843362305442, 0.846085708053253))
None


crc
node_met
AUC 0.9554072838476508
Outcome probability: 0.10801963993453355
Average precision score: 0.72
Best F1: 0.7194244604316548
Best F1 thres

  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.614764405470755, 0.7841909075122424))
None


nsclc
liver_met
AUC 0.9922775057603688
Outcome probability: 0.06471816283924843
Average precision score: 0.90
Best F1: 0.8444444444444444
Best F1 threshold: -0.66498333
-0.66498333
auroc
(0.9866442773804871, (0.9821479240890578, 0.9896092826714206))
auprc
(0.8892280745192813, (0.8457618869680442, 0.919239294629213))
best f1
(0.8480392156862745, (0.8176342490078354, 0.8714114940668686))
None


nsclc
lung_met
AUC 0.964797967379723
Outcome probability: 0.3068893528183716
Average precision score: 0.92
Best F1: 0.8490566037735849
Best F1 threshold: -0.7519004
-0.7519004
auroc
(0.9792392097045892, (0.9727722886241512, 0.9836488401148297))
auprc
(0.9053341895456811, (0.8826915214438802, 0.9253177807422835))
best f1
(0.8253676470588236, (0.7965719943960605, 0.8467037631139958))
None


nsclc
node_met
AUC 0.9771436584168829
Outcome probability: 0.12421711899791232
Average precision score: 0.88
Best F1: 0.8220338983050848
Best F1

  F1 = 2*((precision*recall)/(precision+recall))


(0.9741017978835665, (0.963329692388757, 0.9813793081973076))
auprc
(0.7881117123858585, (0.7234182674537881, 0.8341482279127224))
best f1
(0.7310344827586207, (0.6772213458745198, 0.7684478371501272))
None


rcc_barkouny
brain_met
AUC 0.9982378854625551
Outcome probability: 0.06198347107438017
Average precision score: 0.98
Best F1: 0.9655172413793104
Best F1 threshold: -0.28120264
-0.28120264
auroc
(0.9925895684409616, (0.9876622775560114, 0.9955052407309902))
auprc
(0.9367140616631078, (0.8887061612721061, 0.9579486244057559))
best f1
(0.8997955010224948, (0.8695065917461254, 0.9248421598211896))
None


rcc_barkouny
bone_met
AUC 0.9930161943319838
Outcome probability: 0.21487603305785125
Average precision score: 0.97
Best F1: 0.9454545454545454
Best F1 threshold: -0.9910559
-0.9910559
auroc
(0.9894491341991342, (0.9859503525331544, 0.9918513952212605))
auprc
(0.9557770212081992, (0.939349228213181, 0.9669546786125088))
best f1
(0.9077380952380952, (0.889055756251692, 0.92178435707010

  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.6202653676456972, 0.7794440904942211))
None


rcc_barkouny
liver_met
AUC 0.9854388036206219
Outcome probability: 0.045454545454545456
Average precision score: 0.71
Best F1: 0.7586206896551725
Best F1 threshold: -1.226635
-1.226635
auroc
(0.9866442773804871, (0.982121692500675, 0.989701852688377))
auprc
(0.8892280745192813, (0.8456261606263101, 0.9183999926299427))
best f1
(0.8480392156862745, (0.8185870951693556, 0.8715697792463905))
None


rcc_barkouny
lung_met
AUC 0.9754273504273504
Outcome probability: 0.10743801652892562
Average precision score: 0.82
Best F1: 0.7457627118644068
Best F1 threshold: 0.6671173
0.6671173
auroc
(0.9792392097045892, (0.9730894494127955, 0.9830925093875239))
auprc
(0.9053341895456811, (0.8803797882246805, 0.9236225393672688))
best f1
(0.8253676470588236, (0.8009250717673847, 0.8468429300545669))
None


rcc_barkouny
node_met
AUC 0.9909700722394221
Outcome probability: 0.15702479338842976
Average precision score: 0.97
Best F1: 0.918918

  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.6153846153846153, 0.7770700636942676))
None


bladder
liver_met
AUC 0.9924636174636174
Outcome probability: 0.042071197411003236
Average precision score: 0.85
Best F1: 0.8148148148148148
Best F1 threshold: 0.7665204
0.7665204
auroc
(0.9866442773804871, (0.9820831796632051, 0.9897467576377136))
auprc
(0.8892280745192813, (0.8509017434334374, 0.9180263839995288))
best f1
(0.8480392156862745, (0.8188906075369268, 0.8708340702037448))
None


bladder
lung_met
AUC 0.9619461810274532
Outcome probability: 0.08414239482200647
Average precision score: 0.75
Best F1: 0.7777777777777779
Best F1 threshold: 0.9045065
0.9045065
auroc
(0.9792392097045892, (0.9735450027720925, 0.9834993765743612))
auprc
(0.9053341895456811, (0.8793433255576998, 0.9238672076376521))
best f1
(0.8253676470588236, (0.7980030730163632, 0.8461985941093076))
None


bladder
node_met
AUC 0.9644769491968134
Outcome probability: 0.20064724919093851
Average precision score: 0.85
Best F1: 0.8243243243243245
Be

  F1 = 2*((precision*recall)/(precision+recall))


(0.9878546905828924, (0.9709391574050804, 0.9933737072770585))
auprc
(0.7448038481751987, (0.613101772314988, 0.8191366121872053))
best f1


  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
  f1_score

(0.713375796178344, (0.618184297841026, 0.776978417266187))
None


pancreas
liver_met
AUC 0.9533955253379713
Outcome probability: 0.24661246612466126
Average precision score: 0.82
Best F1: 0.8190476190476191
Best F1 threshold: -1.5626113
-1.5626113
auroc
(0.9866442773804871, (0.9820591092144572, 0.9898963970331781))
auprc
(0.8892280745192813, (0.8493233815425806, 0.918568143416656))
best f1
(0.8480392156862745, (0.8176871224977871, 0.8733333162766244))
None


pancreas
lung_met
AUC 0.9610722610722611
Outcome probability: 0.10569105691056911
Average precision score: 0.84
Best F1: 0.761904761904762
Best F1 threshold: 2.0154307
2.0154307
auroc
(0.9792392097045892, (0.9731012049746638, 0.9829905255632503))
auprc
(0.9053341895456811, (0.881898743335592, 0.924255440961712))
best f1
(0.8253676470588236, (0.7996218010228828, 0.8462408614115986))
None


pancreas
node_met
AUC 0.9470507544581619
Outcome probability: 0.12195121951219512
Average precision score: 0.67
Best F1: 0.7090909090909091
Best