In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils import data
from transformers import AutoTokenizer
from torch.utils.data import DataLoader

In [2]:
# pull in your dataset here. It should have a column labeled 'text' containing the full radiology report text (not just impression).
# if you have narrative reports separate from the impressions, would concatenate the impressions at the end of the narratives.
reports = pd.read_csv('/mnt/d/Dropbox (Partners HealthCare)/profile_3-2023/derived_data/labeled_imaging_prissmm.csv')
reports['progression'] = np.where(reports.class_status==3,1,reports.progression)
reports = reports[reports.split=='test']
inference_input = reports
inference_input['text'] = inference_input['text'].str.lower().str.replace("\n", " ")
inference_input.drop(inference_input.filter(regex='Unnamed|outcome').columns, axis=1, inplace=True)


In [3]:
class UnLabeledDataset(data.Dataset):
    def __init__(self, pandas_dataset):
        self.data = pandas_dataset.copy()
        self.indices = self.data.index.unique()
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', truncation_side='left')        
        
        
    def __len__(self):
        # how many notes in the dataset
        return len(self.indices)
    
    def __getitem__(self, index):
        # get data for notes corresponding to indices passed
        this_index = self.indices[index]
        pand = self.data.loc[this_index, :]
    
        encoded = self.tokenizer(pand['text'], padding='max_length', truncation=True)

        x_text_tensor = torch.tensor(encoded.input_ids, dtype=torch.long)
        x_attention_mask = torch.tensor(encoded.attention_mask, dtype=torch.long)
        return x_text_tensor, x_attention_mask
        

In [4]:
from transformers import AutoModel
from torch.nn import functional as F
import torch.nn as nn
from torch.nn import Linear, Sequential, ReLU

   
class LabeledModel(nn.Module):

    def __init__(self):
        super(LabeledModel, self).__init__()
        
        self.bert = AutoModel.from_pretrained('bert-base-uncased')
        
        self.any_cancer_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.response_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.progression_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.brain_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.bone_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.adrenal_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.liver_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.lung_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.node_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.peritoneal_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))

        
    def forward(self, x_text_tensor, x_attention_mask):
        main = self.bert(x_text_tensor, x_attention_mask)
        main = main.last_hidden_state[:,0,:].squeeze(1)

                                          
        any_cancer_out = self.any_cancer_head(main)
        response_out = self.response_head(main)
        progression_out = self.progression_head(main)
        brain_out = self.brain_head(main)
        bone_out = self.bone_head(main)
        adrenal_out = self.adrenal_head(main)
        liver_out = self.liver_head(main)
        lung_out = self.lung_head(main)
        node_out = self.node_head(main)
        peritoneum_out = self.peritoneal_head(main)

        return any_cancer_out, response_out, progression_out, brain_out, bone_out, adrenal_out, liver_out, lung_out, node_out, peritoneum_out
        




In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
# write out the inference dataset
themodel = LabeledModel()
themodel.load_state_dict(torch.load('./pan_prissmm_full_imaging_bert.pt'))
themodel.to(device)

themodel.eval()

dataset = data.DataLoader(UnLabeledDataset(inference_input), batch_size=16, shuffle=False, num_workers=0)

output_true_lists = [[] for x in range(10)]
output_prediction_lists = [[] for x in range(10)]
for batch in dataset:

    x_text_ids = batch[0].to(device)
    x_attention_mask = batch[1].to(device)
    with torch.no_grad():
        predictions = themodel(x_text_ids, x_attention_mask)
    for j in range(10):
        output_prediction_lists[j].append(predictions[j].detach().cpu().numpy())

output_prediction_lists = [np.concatenate(x) for x in output_prediction_lists]


output_dataset = inference_input.copy()
for x in range(10):
    output_dataset['outcome_' + str(x) + '_logit'] = output_prediction_lists[x]


In [7]:
output_dataset=output_dataset.rename(columns={'outcome_0_logit' : 'any_cancer_logit',
                                              'outcome_1_logit' : 'response_logit',
                                              'outcome_2_logit' : 'progression_logit',
                                              'outcome_3_logit' : 'brain_met_logit',
                                              'outcome_4_logit' : 'bone_met_logit',
                                              'outcome_5_logit' : 'adrenal_met_logit',
                                              'outcome_6_logit' : 'liver_met_logit',
                                              'outcome_7_logit' : 'lung_met_logit',
                                              'outcome_8_logit' : 'node_met_logit',
                                              'outcome_9_logit' : 'peritoneal_met_logit'})

In [8]:

output_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3460 entries, 103 to 37183
Data columns (total 43 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   dfci_mrn                  3460 non-null   float64
 1   cancer_type               3460 non-null   object 
 2   image_scan_type           3460 non-null   float64
 3   date                      3460 non-null   object 
 4   head_imaged               3460 non-null   float64
 5   neck_imaged               3460 non-null   float64
 6   spine_imaged              3460 non-null   float64
 7   chest_imaged              3460 non-null   float64
 8   abdomen_imaged            3460 non-null   float64
 9   pelvis_imaged             3460 non-null   float64
 10  any_cancer                3460 non-null   int64  
 11  progression               3460 non-null   int64  
 12  response                  3460 non-null   int64  
 13  class_status              3459 non-null   float64
 14  brain

In [9]:
from utils_102023 import eval_model

In [10]:
for outcome in ['any_cancer','progression','response','brain_met','bone_met','adrenal_met','liver_met','lung_met','node_met','peritoneal_met']:
    print('all cancers')
    print(outcome)
    print(eval_model(output_dataset[outcome + '_logit'], output_dataset[outcome], graph=False))
    print("\n")

all cancers
any_cancer
AUC 0.9743241398235375
Outcome probability: 0.5531791907514451
Average precision score: 0.98
Best F1: 0.926250644662197
Best F1 threshold: 0.112576514
0.112576514


all cancers
progression
AUC 0.9555354364786938
Outcome probability: 0.23265895953757226
Average precision score: 0.88
Best F1: 0.7982673267326732
Best F1 threshold: -0.14636283
-0.14636283


all cancers
response
AUC 0.9741017978835665
Outcome probability: 0.06329479768786127
Average precision score: 0.79
Best F1: 0.7310344827586207
Best F1 threshold: -0.12320555
-0.12320555


all cancers
brain_met
AUC 0.9925895684409616
Outcome probability: 0.07138728323699423
Average precision score: 0.94
Best F1: 0.8997955010224948
Best F1 threshold: 0.36863852
0.36863852


all cancers
bone_met
AUC 0.9894491341991342
Outcome probability: 0.1907514450867052
Average precision score: 0.96
Best F1: 0.9077380952380952
Best F1 threshold: 0.39138335
0.39138335


all cancers
adrenal_met
AUC 0.9878546905828924
Outcome probab

In [11]:
for cancer in output_dataset.cancer_type.unique():
    subset = output_dataset[output_dataset.cancer_type == cancer]
    for outcome in ['any_cancer','progression','response','brain_met','bone_met','adrenal_met','liver_met','lung_met','node_met','peritoneal_met']:
        print(cancer)
        print(outcome)
        print(eval_model(subset[outcome + '_logit'], subset[outcome], graph=False))
        print("\n")
    

prostate
any_cancer
AUC 0.9857324449899185
Outcome probability: 0.5661252900232019
Average precision score: 0.99
Best F1: 0.9598393574297189
Best F1 threshold: 0.7858024
0.7858024


prostate
progression
AUC 0.967201166180758
Outcome probability: 0.20417633410672853
Average precision score: 0.89
Best F1: 0.8323699421965318
Best F1 threshold: 0.3227973
0.3227973


prostate
response
AUC 0.9554494828957836
Outcome probability: 0.027842227378190254
Average precision score: 0.56
Best F1: 0.5925925925925926
Best F1 threshold: -1.4151888
-1.4151888


prostate
brain_met
AUC 0.9291277258566977
Outcome probability: 0.0069605568445475635
Average precision score: 0.23
Best F1: 0.4444444444444444
Best F1 threshold: 1.8895986
1.8895986


prostate
bone_met
AUC 0.9846976162765636
Outcome probability: 0.48491879350348027
Average precision score: 0.98
Best F1: 0.9626168224299065
Best F1 threshold: 0.8293912
0.8293912


prostate
adrenal_met
no outcome variation to calculate metrics
None


prostate
liver_m

  F1 = 2*((precision*recall)/(precision+recall))
  F1 = 2*((precision*recall)/(precision+recall))
  F1 = 2*((precision*recall)/(precision+recall))


AUC 0.9771436584168829
Outcome probability: 0.12421711899791232
Average precision score: 0.88
Best F1: 0.8220338983050848
Best F1 threshold: 0.27703995
0.27703995


nsclc_phase2_existing
peritoneal_met
AUC 0.9957805907172996
Outcome probability: 0.010438413361169102
Average precision score: 0.78
Best F1: 0.761904761904762
Best F1 threshold: -0.6971576
-0.6971576


rcc_barkouny
any_cancer
AUC 0.9882075471698113
Outcome probability: 0.4380165289256198
Average precision score: 0.99
Best F1: 0.9333333333333333
Best F1 threshold: 1.2351178
1.2351178


rcc_barkouny
progression
AUC 0.956677959141772
Outcome probability: 0.21074380165289255
Average precision score: 0.88
Best F1: 0.7777777777777778
Best F1 threshold: 0.8555153
0.8555153


rcc_barkouny
response
AUC 0.9356223175965666
Outcome probability: 0.0371900826446281
Average precision score: 0.38
Best F1: 0.5
Best F1 threshold: 2.6432142
2.6432142


rcc_barkouny
brain_met
AUC 0.9982378854625551
Outcome probability: 0.06198347107438017
Aver

  F1 = 2*((precision*recall)/(precision+recall))
