In [None]:
# this notebook illustrates simple membership inference attacks against an overfit version of the teacher model
# and a version of the student model trained to predict outputs of the overfit teacher

In [1]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer
import torch
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
phi_data = pd.read_csv('/data/clin_notes_outcomes/profile_3-2023/derived_data/labeled_medonc_prissmm_mixedisprog.csv')


In [3]:
training = phi_data[phi_data.split=='train']
training.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32173 entries, 0 to 39190
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0.1              32173 non-null  int64  
 1   dfci_mrn                  32173 non-null  float64
 2   cancer_type               32173 non-null  object 
 3   date                      32173 non-null  object 
 4   any_cancer                32173 non-null  int64  
 5   progression               32173 non-null  int64  
 6   response                  32173 non-null  int64  
 7   Unnamed: 0                32173 non-null  int64  
 8   text                      32173 non-null  object 
 9   PROVIDER_DEPARTMENT       32173 non-null  object 
 10  patient_id                32173 non-null  int64  
 11  hybrid_death_ind          32173 non-null  object 
 12  hybrid_death_dt           19552 non-null  object 
 13  primary_cancer_diagnosis  32173 non-null  object 
 14  genomics_da

In [4]:
training.dfci_mrn.nunique()

2917

In [5]:
validation = phi_data[phi_data.split=='validation']

validation.info()


<class 'pandas.core.frame.DataFrame'>
Index: 3636 entries, 362 to 39187
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0.1              3636 non-null   int64  
 1   dfci_mrn                  3636 non-null   float64
 2   cancer_type               3636 non-null   object 
 3   date                      3636 non-null   object 
 4   any_cancer                3636 non-null   int64  
 5   progression               3636 non-null   int64  
 6   response                  3636 non-null   int64  
 7   Unnamed: 0                3636 non-null   int64  
 8   text                      3636 non-null   object 
 9   PROVIDER_DEPARTMENT       3636 non-null   object 
 10  patient_id                3636 non-null   int64  
 11  hybrid_death_ind          3636 non-null   object 
 12  hybrid_death_dt           2416 non-null   object 
 13  primary_cancer_diagnosis  3636 non-null   object 
 14  genomics_d

In [6]:
test = phi_data[phi_data.split=='test']

test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3382 entries, 57 to 39088
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0.1              3382 non-null   int64  
 1   dfci_mrn                  3382 non-null   float64
 2   cancer_type               3382 non-null   object 
 3   date                      3382 non-null   object 
 4   any_cancer                3382 non-null   int64  
 5   progression               3382 non-null   int64  
 6   response                  3382 non-null   int64  
 7   Unnamed: 0                3382 non-null   int64  
 8   text                      3382 non-null   object 
 9   PROVIDER_DEPARTMENT       3382 non-null   object 
 10  patient_id                3382 non-null   int64  
 11  hybrid_death_ind          3382 non-null   object 
 12  hybrid_death_dt           2160 non-null   object 
 13  primary_cancer_diagnosis  3382 non-null   object 
 14  genomics_da

In [29]:
sample_train = training.sample(n=100, random_state=42)
sample_validation = validation.sample(n=100, random_state=42)


In [31]:

from transformers import AutoModel



from torch.nn import functional as F
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.nn import LSTM, Linear, Embedding, Conv1d, MaxPool1d, GRU, LSTMCell, Dropout, Module, Sequential, ReLU

   
class LabeledModel(nn.Module):

    def __init__(self, device='cuda'):
        super(LabeledModel, self).__init__()
        
        self.longformer = AutoModel.from_pretrained('yikuan8/Clinical-Longformer')
        self.any_cancer_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.response_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))
        self.progression_head = Sequential(Linear(768, 128), ReLU(), Linear(128,1))


        
    def forward(self, x_text_tensor, x_attention_mask):
        # x should be tuple of input IDs, then attention mask
        global_attention_mask = torch.zeros_like(x_text_tensor, device='cuda')
        # global attention on cls token
        global_attention_mask[:, 0] = 1
        main = self.longformer(x_text_tensor, x_attention_mask, global_attention_mask)
        main = main.last_hidden_state[:,0,:].squeeze(1)

                                          
        any_cancer_out = self.any_cancer_head(main)
        response_out = self.response_head(main)
        progression_out = self.progression_head(main)



        
        return any_cancer_out, response_out, progression_out
        




In [32]:
from torch.utils import data

class LabeledDataset(data.Dataset):
    def __init__(self, pandas_dataset):
        self.data = pandas_dataset.copy()
        self.indices = self.data.index.unique()
        self.tokenizer = AutoTokenizer.from_pretrained("yikuan8/Clinical-Longformer", truncation_side='left')        
        
        
    def __len__(self):
        # how many notes in the dataset
        return len(self.indices)
    
    def __getitem__(self, index):
        # get data for notes corresponding to indices passed
        this_index = self.indices[index]
        pand = self.data.loc[this_index, :]
        #label = torch.tensor(pand.progression, dtype=torch.float32)
    
        encoded = self.tokenizer(pand['text'], padding='max_length', truncation=True)

        x_text_tensor = torch.tensor(encoded.input_ids, dtype=torch.long)
        x_attention_mask = torch.tensor(encoded.attention_mask, dtype=torch.long)
        
        y_any_cancer = torch.tensor(pand.any_cancer, dtype=torch.float32)
        y_response = torch.tensor(pand.response, dtype=torch.float32)
        y_progression = torch.tensor(pand.progression, dtype=torch.float32)



        return x_text_tensor, x_attention_mask, y_any_cancer, y_response, y_progression
        
        

In [33]:
tokenizer =  AutoTokenizer.from_pretrained("yikuan8/Clinical-Longformer", truncation_side='left')        


In [34]:
train_validation = pd.concat([sample_train, sample_validation], axis=0)

In [36]:
# inference on training_validation dataset for teacher model
themodel = LabeledModel()
themodel.load_state_dict(torch.load('dfci_phi_note_longformer_overfit_small_train.pt'))
themodel.to('cuda')

themodel.eval()

no_shuffle_valid_dataset = data.DataLoader(LabeledDataset(train_validation), batch_size=4, shuffle=False, num_workers=0)

output_true_lists = [[] for x in range(3)]
output_prediction_lists = [[] for x in range(3)]
for batch in no_shuffle_valid_dataset:
    #thisframe = pd.DataFrame()
    x_text_ids = batch[0].to('cuda')
    x_attention_mask = batch[1].to('cuda')
    with torch.no_grad():
        predictions = themodel(x_text_ids, x_attention_mask)
    for j in range(3):
        output_true_lists[j].append(batch[2+j].detach().cpu().numpy())
        output_prediction_lists[j].append(predictions[j].detach().cpu().numpy())

output_true_lists = [np.concatenate(x) for x in output_true_lists]        
output_prediction_lists = [np.concatenate(x) for x in output_prediction_lists]


output_validation = train_validation.copy()
for x in range(3):
    output_validation['outcome_' + str(x) + '_logit'] = output_prediction_lists[x]



Some weights of LongformerModel were not initialized from the model checkpoint at yikuan8/Clinical-Longformer and are newly initialized: ['longformer.pooler.dense.bias', 'longformer.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [37]:
train_validation = output_validation.copy()

In [38]:
train_validation.to_csv('teacher_attack_dataset_small_train.csv')

In [39]:
train_validation = pd.concat([sample_train, sample_validation], axis=0)

In [40]:
# inference on training_validation dataset for student model
themodel = LabeledModel()
themodel.load_state_dict(torch.load('dfci_mimic_note_longformer_overfit_small_train.pt'))
themodel.to('cuda')

themodel.eval()

no_shuffle_valid_dataset = data.DataLoader(LabeledDataset(train_validation), batch_size=4, shuffle=False, num_workers=0)

output_true_lists = [[] for x in range(3)]
output_prediction_lists = [[] for x in range(3)]
for batch in no_shuffle_valid_dataset:
    #thisframe = pd.DataFrame()
    x_text_ids = batch[0].to('cuda')
    x_attention_mask = batch[1].to('cuda')
    with torch.no_grad():
        predictions = themodel(x_text_ids, x_attention_mask)
    for j in range(3):
        output_true_lists[j].append(batch[2+j].detach().cpu().numpy())
        output_prediction_lists[j].append(predictions[j].detach().cpu().numpy())

output_true_lists = [np.concatenate(x) for x in output_true_lists]        
output_prediction_lists = [np.concatenate(x) for x in output_prediction_lists]


output_validation = train_validation.copy()
for x in range(3):
    output_validation['outcome_' + str(x) + '_logit'] = output_prediction_lists[x]



Some weights of LongformerModel were not initialized from the model checkpoint at yikuan8/Clinical-Longformer and are newly initialized: ['longformer.pooler.dense.bias', 'longformer.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
train_validation = output_validation.copy()

In [42]:
train_validation.to_csv('student_attack_dataset_small_train.csv')

In [74]:
train_validation = pd.read_csv('teacher_attack_dataset_small_train.csv')
train_validation['in_teacher_training'] = np.where(train_validation.split=='train',1.0,0.0)
unique_mrns = pd.Series(train_validation.dfci_mrn.unique())
attack_train_mrns = unique_mrns.sample(frac=0.6, random_state=42)
attack_valid_mrns = unique_mrns[~ unique_mrns.isin(attack_train_mrns)]

attack_train = train_validation[train_validation.dfci_mrn.isin(attack_train_mrns)]
attack_valid = train_validation[train_validation.dfci_mrn.isin(attack_valid_mrns)]

In [75]:

attack_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 116 entries, 0 to 195
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0.2              116 non-null    int64  
 1   Unnamed: 0.1              116 non-null    int64  
 2   dfci_mrn                  116 non-null    float64
 3   cancer_type               116 non-null    object 
 4   date                      116 non-null    object 
 5   any_cancer                116 non-null    int64  
 6   progression               116 non-null    int64  
 7   response                  116 non-null    int64  
 8   Unnamed: 0                116 non-null    int64  
 9   text                      116 non-null    object 
 10  PROVIDER_DEPARTMENT       116 non-null    object 
 11  patient_id                116 non-null    int64  
 12  hybrid_death_ind          116 non-null    object 
 13  hybrid_death_dt           82 non-null     object 
 14  primary_cancer_

In [57]:
from sklearn.metrics import roc_auc_score


In [76]:
# train simple logistic regression model to predict if a given note was in teacher training set using outputs and labels
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
logreg = LogisticRegression(random_state=0, penalty=None).fit(attack_train[['any_cancer','progression','response','outcome_0_logit','outcome_1_logit','outcome_2_logit']], attack_train['in_teacher_training'])
valid_preds = logreg.predict_proba(attack_valid[['any_cancer','progression','response','outcome_0_logit','outcome_1_logit','outcome_2_logit']])

In [79]:
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve
from confidenceinterval.bootstrap import bootstrap_ci

#random_generator = np.random.default_rng()
random_generator=42
print('auroc')
print(bootstrap_ci(y_true=attack_valid['in_teacher_training'],
             y_pred=valid_preds[:,1],
             metric=roc_auc_score,
             confidence_level=0.95,
             n_resamples=1000,
             method='bootstrap_bca',
             random_state=random_generator))

auroc
(0.7146908678389109, (0.5933068633011911, 0.8248084069524211))


In [80]:
# now do student
train_validation = pd.read_csv('student_attack_dataset_small_train.csv')
train_validation['in_teacher_training'] = np.where(train_validation.split=='train',1.0,0.0)
unique_mrns = pd.Series(train_validation.dfci_mrn.unique())
attack_train_mrns = unique_mrns.sample(frac=0.6, random_state=42)
attack_valid_mrns = unique_mrns[~ unique_mrns.isin(attack_train_mrns)]

attack_train = train_validation[train_validation.dfci_mrn.isin(attack_train_mrns)]
attack_valid = train_validation[train_validation.dfci_mrn.isin(attack_valid_mrns)]

In [81]:
attack_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 116 entries, 0 to 195
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0.2              116 non-null    int64  
 1   Unnamed: 0.1              116 non-null    int64  
 2   dfci_mrn                  116 non-null    float64
 3   cancer_type               116 non-null    object 
 4   date                      116 non-null    object 
 5   any_cancer                116 non-null    int64  
 6   progression               116 non-null    int64  
 7   response                  116 non-null    int64  
 8   Unnamed: 0                116 non-null    int64  
 9   text                      116 non-null    object 
 10  PROVIDER_DEPARTMENT       116 non-null    object 
 11  patient_id                116 non-null    int64  
 12  hybrid_death_ind          116 non-null    object 
 13  hybrid_death_dt           82 non-null     object 
 14  primary_cancer_

In [82]:
# train attack model on student outputs
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
logreg = LogisticRegression(random_state=0, penalty=None).fit(attack_train[['any_cancer','progression','response','outcome_0_logit','outcome_1_logit','outcome_2_logit']], attack_train['in_teacher_training'])
valid_preds = logreg.predict_proba(attack_valid[['any_cancer','progression','response','outcome_0_logit','outcome_1_logit','outcome_2_logit']])

In [83]:
# evaluate attack model on student outputs
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve
from confidenceinterval.bootstrap import bootstrap_ci

random_generator=42
print('auroc')
print(bootstrap_ci(y_true=attack_valid['in_teacher_training'],
             y_pred=valid_preds[:,1],
             metric=roc_auc_score,
             confidence_level=0.95,
             n_resamples=1000,
             method='bootstrap_bca',
             random_state=random_generator))

auroc
(0.5456608054452637, (0.4305343473356578, 0.6620339549748387))
