In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import torch
import transformers
import datasets
import shap

labels = classes = ['MOT', 'CLA', 'SOU','SUB', 'MEA', 'ORI', 'REP']


#If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda:4")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
  print('No GPU available, using the CPU instead.')
  device = torch.device("cpu")

#torch.cuda.set_device(0)
import json
import numpy as np
import os
import random
import re
import pickle
import torch
from tqdm.autonotebook import tqdm
from transformers import AutoTokenizer,AutoModel

model_name='bert-base-uncased'

tokenizer=AutoTokenizer.from_pretrained(model_name)

import torch.nn as nn
import torch.nn.functional as F
import numpy as np


import json
import numpy as np
import os
import random
import re
import pickle
import torch
from tqdm.autonotebook import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import nltk
import torch.nn as nn
import torch.nn.functional as F


if torch.cuda.is_available():    
    device = torch.device("cuda:4")

else:
   device = torch.device("cpu")

torch.cuda.set_device(2)


class BertModel(nn.Module):
    def __init__(self, in_features, out_features):

        super(BertModel, self).__init__()

        self.in_features = in_features   #768
        self.out_features = out_features    #7

        self.flatten=nn.Flatten()
        self.lstm_1 = nn.LSTM(in_features, 512//2, batch_first=True, bidirectional=True) #bidirectional=True

        self.linear1=nn.Linear(512*7,512*2)
        self.linear2=nn.Linear(512*2,256)
        self.linear3=nn.Linear(256,64)

        self.last_dense = nn.Linear(64, self.out_features)
        self.dropout1=nn.Dropout(p=0.4)
        self.dropout2=nn.Dropout(p=0.2)

        self.relu = nn.ReLU()
        self.sigmoid=nn.Sigmoid()

        self.category=nn.Linear(512,out_features)
        
        #SENTIMENT PART

        self.linear_sen1=nn.Linear(512,256)
        self.linear_sen2=nn.Linear(256,32)
        self.linear_sen3=nn.Linear(32,2)
        
 
        #DECSION PART OF THE MODEL       
        hidden_dimension=512
    
        self.linear_des1=nn.Linear(hidden_dimension*3,256)
        self.linear_des2=nn.Linear(256,64)
        self.linear_des3=nn.Linear(64,8)
        self.linear_des4=nn.Linear(8,1)

        imp_score= torch.rand( 1,7,requires_grad=True)  #(512,1)
        nn.init.xavier_normal_(imp_score)
        
        self.imp_score=imp_score.to(device)
        
    def common(self, review):

        s_e=review                                    #(4,40,768)

        h0 = torch.zeros(2, s_e.size(0), 512 // 2)
        c0 = torch.zeros(2, s_e.size(0), 512 // 2)
        h0, c0 = h0.to(device), c0.to(device)
        s_e, (hn, cn) = self.lstm_1(s_e, (h0, c0))    #(4,40,512)

        l = self.relu(self.linear_sen1(s_e))
        l = self.dropout1(l)
        l = self.relu(self.linear_sen2(l))
        l = self.dropout1(l)
        l = self.relu(self.linear_sen3(l))     #(4,40,2)


        comp=self.category(s_e)         #(4,40,7)
        comp = comp.permute(0,2,1)      #(4,7,40)
        
        
        wts = F.softmax(comp, dim=2) #(4,7,40)
        e=torch.bmm(wts,s_e)       #(4,7,512)

        out_sen=torch.matmul(wts,l)  #(4,7,2)

        
        l = torch.reshape(e, (s_e.size(0), -1)) #(4,7*512)
        
        l = self.relu(self.linear1(l))
        l = self.dropout1(l)
        l = self.relu(self.linear2(l))
        l = self.dropout1(l)
        l = self.relu(self.linear3(l))

        model_output = self.sigmoid(self.last_dense(l))
        model_output_sen =self.sigmoid(out_sen)

        return model_output, e, wts,model_output_sen

    
    def forward(self, review1,review2,review3):

        aspect_output1,dse1,imp_s1,out_sen1=self.common(review1)
        aspect_output2,dse2,imp_s2,out_sen2=self.common(review2)
        aspect_output3,dse3,imp_s3,out_sen3=self.common(review3)

        i_p_aspect = F.softmax(self.imp_score,dim=1).to(device)  #(1,7)
        i_p_aspect= i_p_aspect.unsqueeze(dim=1) #(1,1,7)
        
        dse1=torch.matmul(i_p_aspect,dse1).squeeze(dim=1)  #(n_p,512)
        dse2=torch.matmul(i_p_aspect,dse2).squeeze(dim=1)  #(n_p,512)
        dse3=torch.matmul(i_p_aspect,dse3).squeeze(dim=1)  #(n_p,512)

        final_des_embed=torch.cat((dse1,dse2,dse3),dim=1)  #(n_p,512*3)

        x=self.relu(self.linear_des1(final_des_embed))  #(n_p,256)
        x=self.relu(self.linear_des2(x))  #(n_p,64)
        x=self.relu(self.linear_des3(x)) # (n_p,8)
        x=self.relu(self.linear_des4(x)) # (n_p,1)

        output=self.sigmoid(x)
        
        return aspect_output1,aspect_output2,aspect_output3,imp_s1,imp_s2,imp_s3,output,out_sen1,out_sen2,out_sen3
        
        
text_model = BertModel(768,7)
text_model.to(device)
text_model.load_state_dict(torch.load('/home/sandeep_2121cs29/hardik/COLING_2022/ckpt/bert_coling_multi.pt',map_location=device))


from sentence_transformers import SentenceTransformer
model = SentenceTransformer('stsb-roberta-base')
model=model.to(device)

def create_embeddings(r):
    sentences=nltk.sent_tokenize(r)
    max_length=40

    if len(sentences)<=max_length:
        sentences=sentences+[""]*(max_length-len(sentences))

    else:
        sentences=sentences[0:max_length]

    encoded=model.encode(sentences, show_progress_bar=False)
    
    return encoded


def f(x):
   
  r=x.split('[DEPT]')
  x1=r[0]
  x2=r[1]
  x3=r[2]

  e1=create_embeddings(x1)
  e2=create_embeddings(x2)
  e3=create_embeddings(x3)

  #   inputs = tokenizer(x,add_special_tokens=False,
  #     return_token_type_ids=True,
  #     return_length = True,
  #     truncation=False)
  #   # #inputs = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=128, truncation=True) for v in x]).cuda()
            
  #   ids = torch.tensor(inputs['input_ids']).to(device,dtype = torch.long)
  #   mask = torch.tensor(inputs['attention_mask']).to(device,dtype = torch.long)
  #   token_type_ids = torch.tensor(inputs['token_type_ids']).to(device,dtype = torch.long)
  #t1=(ids,mask,token_type_ids)
  
  out1,out2,out3, attn_t1,attn_t2,attn_t3 ,d,out_sen1,out_sen2,out_sen3 = text_model(e1,e2,e3)

  ret = d.cpu().detach().numpy()

  print(ret)
  return ret


def model_prediction_gpu(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', 
                                        max_length=512, truncation=True) for v in x]).to(device,dtype = torch.long)
    attention_mask = (tv!=0).type(torch.int64).to(device,dtype = torch.long)
    token_type_ids = torch.zeros_like(attention_mask).to(device,dtype = torch.long)
    t1=(tv,attention_mask,token_type_ids)
    out_test, attn_T,attn_T_S,out_test_senti = text_model(t1,'last')
    val = torch.logit(out_test).detach().cpu().numpy()
    return val


method = "custom tokenizer"

# build an explainer by passing a transformers tokenizer
if method == "transformers tokenizer":
    explainer = shap.Explainer(f, tokenizer, output_names=labels)

# build an explainer by explicitly creating a masker
elif method == "default masker":
    masker = shap.maskers.Text(r"\W") # this will create a basic whitespace tokenizer
    explainer = shap.Explainer(f, masker, output_names=labels)

# build a fully custom tokenizer
elif method == "custom tokenizer":
    import re

    def custom_tokenizer(s, return_offsets_mapping=True):
        """ Custom tokenizers conform to a subset of the transformers API.
        """
        pos = 0
        offset_ranges = []
        input_ids = []
        for m in re.finditer(r"\W", s):
            start, end = m.span(0)
            offset_ranges.append((pos, start))
            input_ids.append(s[pos:start])
            pos = end
        if pos != len(s):
            offset_ranges.append((pos, len(s)))
            input_ids.append(s[pos:])
        out = {}
        out["input_ids"] = input_ids
        if return_offsets_mapping:
            out["offset_mapping"] = offset_ranges
        return out


x = ["this work studies the predictive uncertainty issue of deep learning models . in particular  this work focuses on the distributional uncertainty which is caused by distributional mismatch between training and test examples . the proposed method is developed based on the existing work called dirichlet prior network  dpn  . it aims to address the issue of dpn that its loss function is complicated and makes the optimization difficult . instead  this paper proposes a new loss function for dpn  which consists of the commonly used crossentropy loss term and a regularization term . two loss functions are respectively defined over indomain training examples and outofdistribution  ood  training examples . the final objective function is a weighted combination of the two loss functions . experimental study is conducted on one synthetic dataset and two image datasets  cifar10 and cifar100  to demonstrate the properties of the proposed method and compare its performance with the relevant ones in the literature . the issue researched in this work is of significance because understanding the predictive uncertainty of a deep learning model has its both theoretical and practical value . the motivation [DEPT] research issues and the proposed method are overall clearly presented . the current recommendation is weak reject because the experimental study is not convincing or comprehensive enough . 1 .although the goal of this work is to deal with the inefficiency issue of the objective function of existing dpn with the newly proposed one  this experimental study does not seem to conduct sufficient experiments to demonstrate the advantages  say  in terms of training efficiency  the capability in making the network scalable for more challenging dataset  of the proposed objective function over the existing one  2 . table 1 compares the proposed method with odin . however  as indicated in this work  odin is trained with indomain examples only . is this comparison fair  actually  odin s setting seems to be more practical and more challenging than the setting used by the propose methods . 3 .the evaluation criteria shall be better explained at the beginning of the experiment  especially how they can be collectively used to verify that the proposed method can better distinguish distributional uncertainty from other uncertainty types . 4 .in addition  the experimental study can be clearer on the training and test splits . [DEPT] how many samples from cifar10 and cifar100 are used for training and test purpose  respectively  also  since training examples are from cifar10 and cifar100 and the test examples are also from these two datasets  does this contradict with the motivation of distributional mismatch between training and test examples mentioned in the abstract  5 .the experimental study can have more comparison on challenging datasets with more classes since it is indicated that dpn has difficulty in dealing with a large number of classes . minor  1 . please define the hattheta in eq .also  is the dirac delta estimation a good enough approximation here  2 .the lambda  out   lambda  in  in eq .  11  needs to be better explained . in particular  are the first terms in eq .  10  and eq .  11  comparable in terms of magnitude  otherwise  lambda  out   lambda  in  may not make sense . 3 .the novelty and significance of finetuning the proposed model with noisy ood training images can be better justified ."]

masker = shap.maskers.Text("transformers tokenizer")
explainer = shap.Explainer(model_prediction_gpu, tokenizer, output_names=labels)
shap_values = explainer(x)

shap.plots.text(shap_values)



There are 5 GPU(s) available.
We will use the GPU: A100-PCIE-40GB


AttributeError: module 'shap' has no attribute 'maskers'

In [5]:
!pip install shap



In [11]:
pip install -U transformers

Note: you may need to restart the kernel to use updated packages.


In [9]:
!conda activate hardik


CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
To initialize your shell, run

    $ conda init <SHELL_NAME>

Currently supported shells are:
  - bash
  - fish
  - tcsh
  - xonsh
  - zsh
  - powershell

See 'conda init --help' for more information and options.

IMPORTANT: You may need to close and restart your shell after running 'conda init'.




In [2]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import torch
import transformers
import datasets
import shap

labels = classes = ['MOT', 'CLA', 'SOU','SUB', 'MEA', 'ORI', 'REP']

'''
method = "custom tokenizer"

# build an explainer by passing a transformers tokenizer
if method == "transformers tokenizer":
    explainer = shap.Explainer(f, tokenizer, output_names=labels)

# build an explainer by explicitly creating a masker
elif method == "default masker":
    masker = shap.maskers.Text(r"\W") # this will create a basic whitespace tokenizer
    explainer = shap.Explainer(f, tokenizer, output_names=labels)

# build a fully custom tokenizer
elif method == "custom tokenizer":
    import re
    
    def custom_tokenizer(s, return_offsets_mapping=True):
        """ Custom tokenizers conform to a subset of the transformers API.
        """
        pos = 0
        offset_ranges = []
        input_ids = []
        for m in re.finditer(r"\W", s):
            start, end = m.span(0)
            offset_ranges.append((pos, start))
            input_ids.append(s[pos:start])
            pos = end
        if pos != len(s):
            offset_ranges.append((pos, len(s)))
            input_ids.append(s[pos:])
        out = {}
        out["input_ids"] = input_ids
        if return_offsets_mapping:
            out["offset_mapping"] = offset_ranges
        print(out)
        return out

    masker = shap.maskers.Text(custom_tokenizer)


'''

#If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda:1")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
  print('No GPU available, using the CPU instead.')
  device = torch.device("cpu")

#torch.cuda.set_device(0)
import json
import numpy as np
import os
import random
import re
import pickle
import torch
from tqdm.autonotebook import tqdm
from transformers import AutoTokenizer,AutoModel

model_name='allenai/scibert_scivocab_uncased'

tokenizer=AutoTokenizer.from_pretrained(model_name)

'''
from torch.utils.data import Dataset, DataLoader

class Data(Dataset):
    def __init__(self,embed,aspects,sentiments):

        self.labels=aspects
        self.labels_s=sentiments
        self.sentences=embed
        self.max_len=50
        self.size=len(embed)

    @classmethod
    def getReader(cls,low,up,test=None,r=1):
        if(True):
          with open("data/onehot_aspect_multitask.pkl",'rb') as out:
              labels =pickle.load(out)
              labels = labels[low:up]
          
          with open("data/onehot_sentiment_multitask.pkl",'rb') as out:
              labels_s=pickle.load(out)
              labels_s = labels_s[low:up]
          
          with open("data/dataframe_multitask.pkl",'rb') as out:
              data_s=pickle.load(out)
              sents = list(data_s['sentences'])[low:up]
        
        assert len(labels) == len(sents) ==len(labels_s)
        print("Total number of Reviews", len(labels))
        
        return cls(sents, labels,labels_s)

    def __getitem__(self,idx):

        sen=self.sentences[idx]

        inputs = tokenizer(sen,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            return_length = True,
            truncation=True)
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']
        
        return {
            'ids_sen': torch.tensor(ids),
            'mask_sen': torch.tensor(mask),
            'token_type_ids_sen': torch.tensor(token_type_ids, dtype=torch.float64),
            'targets':self.labels[idx],
            'targets_senti':self.labels_s[idx]}


    def __len__(self):
        return self.size

def getLoaders (batch_size):

        print('Reading the training Dataset...')
        print()
        train_dataset = Data.getReader(0,100000) #19200 #21216
        
        print()

        print('Reading the validation Dataset...')
        print()
        valid_dataset = Data.getReader(100000, 148000) #23200 #25216

        print('Reading the test Dataset...')
        print()
        test_dataset = Data.getReader(148000, 218000) #23200:25248
        
        trainloader = DataLoader(dataset=train_dataset, batch_size = batch_size, num_workers=8,shuffle=True)
        validloader = DataLoader(dataset=valid_dataset, batch_size = batch_size, num_workers=8,shuffle=True)
        testloader = DataLoader(dataset=test_dataset, batch_size = batch_size, num_workers=8)
        
        return trainloader, validloader, testloader

trainloader, validloader, testloader = getLoaders(batch_size)

print("Length of TrainLoader:",len(trainloader))
print("Length of ValidLoader:",len(validloader))
print("Length of TestLoader:",len(testloader))
'''
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class BertModel(nn.Module):
    def __init__(self, in_features, out_features):

        super(BertModel, self).__init__()
        self.model=AutoModel.from_pretrained(model_name,  output_hidden_states=True)

        #for param in self.model.parameters():
        #param.require_grad=True
          
        self.in_features = in_features   #768
        self.out_features = out_features    #7

        self.flatten=nn.Flatten()
        self.lstm_1 = nn.LSTM(in_features, 200//2, batch_first=True, bidirectional=True) #bidirectional=True
    
      
        self.linear1=nn.Linear(200*out_features,200*2)
        self.linear2=nn.Linear(200*2,256)
        self.linear3=nn.Linear(256,64)

        self.linear1_sen=nn.Linear(200,64)
        self.linear2_sen=nn.Linear(64,2)
   
        self.last_dense = nn.Linear(64, self.out_features)
        self.dropout1=nn.Dropout(p=0.5)
        self.dropout2=nn.Dropout(p=0.2)

        self.relu = nn.ReLU()
        self.sigmoid=nn.Sigmoid()
        self.tanh=nn.Tanh()

        category = torch.rand(200, out_features,requires_grad=True)  #(512,7)
        nn.init.xavier_normal_(category)

        self.category=category.to(device)
        #self.category_sent=category_sent.to(device)

    def forward(self, t1,strategy:str):
        
        ids, mask, token_type_ids = t1
        encoded_layers = self.model(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=False)[2]
        scibert_hidden_layer = encoded_layers
        
        if(strategy=='last_4'):
          scibert_hidden_layers=torch.cat((scibert_hidden_layer[-1],
                                        scibert_hidden_layer[-2],
                                        scibert_hidden_layer[-3],
                                        scibert_hidden_layer[-4]),dim=2)
          
        if(strategy=='last'):
          scibert_hidden_layers=encoded_layers[12]


        if(strategy=='mean'):
          scibert_hidden_layers=torch.mean(encoded_layers,dim=2)
      

        s_e=scibert_hidden_layers                  #(32,13,768)

        h0 = torch.zeros(2, s_e.size(0), 200 // 2)
        c0 = torch.zeros(2, s_e.size(0), 200 // 2)
        h0, c0 = h0.to(device), c0.to(device)
        s_e, (hn, cn) = self.lstm_1(s_e, (h0, c0))    #(32,13,512)
      
  
        c=self.category.unsqueeze(0)            #(1,512,7)
        comp = torch.matmul(s_e,c)              #(32,13,7)
        comp = comp.permute(0,2,1)             #(32,7,13)

        comp1=    self.relu(self.linear1_sen(s_e))         #(32,50,256)
        comp1 =   self.linear2_sen(comp1)                  #(32,50,2)

  
        wts = F.softmax(comp, dim=2) #(32,7,13)
        wts1= torch.bmm(wts,comp1)   #(32,7,2)
        wts_temp=wts1
        
        e=torch.bmm(wts,s_e)       #(32,7,512)

        l = torch.reshape(e, (ids.size(0), 200*7))

        l = self.relu(self.linear1(l))
        l = self.dropout1(l)
        l = self.relu(self.linear2(l))
        l = self.dropout1(l)
        l = self.relu(self.linear3(l))

        model_output = self.sigmoid(self.last_dense(l))
        model_output_sent = self.sigmoid(wts1)
        
        del l,comp,s_e,hn,cn,scibert_hidden_layer,ids,mask,token_type_ids
      
        return model_output, wts,comp1,model_output_sent


text_model = BertModel(768,7)
text_model.to(device)

'''
criterion1 = nn.BCELoss()
criterion2 = nn.BCELoss()
from transformers import AdamW, get_linear_schedule_with_warmup

no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in text_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in text_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.01}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5)

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=1000)


text_model.train()
result=[]
EPOCH=10

train_out = []
val_out = []
train_true = []
val_true = []
test_out = []
test_true = []
attn_train = []
attn_val = []
attn_test = []
attn_test_senti=[]
test_out_senti=[]
test_true_senti=[]
loss_log1 = []
loss_log2 = []


for epoch in range(EPOCH):

  final_train_loss=0.0
  final_val_loss=0.0
  l1 = []
  text_model.train()

  for idx,data in tqdm(enumerate(trainloader),desc="Train epoch {}/{}".format(epoch + 1, EPOCH)):

    ids = data['ids_sen'].to(device,dtype = torch.long)
    mask = data['mask_sen'].to(device,dtype = torch.long)
    token_type_ids = data['token_type_ids_sen'].to(device,dtype = torch.long)
    targets = data['targets'].to(device,dtype = torch.float)
    targets_s = data['targets_senti'].to(device,dtype = torch.float)
    
    t1 = (ids,mask,token_type_ids)
    
    optimizer.zero_grad()
    out, attn_t,_,out_sen = text_model(t1,'last')

    if (epoch+1 == EPOCH):
      train_out.append((torch.transpose(out,0,1)).detach().cpu())
      train_true.append((torch.transpose(targets,0,1)).detach().cpu())

    loss = (criterion1(out, targets)+criterion2(out_sen, targets_s))/2
    l1.append(loss.item())
    final_train_loss +=loss.item()
    loss.backward()
    optimizer.step()
    if idx % 100 == 0:
      scheduler.step()

    
  loss_log1.append(np.average(l1))

  text_model.eval()
  l2 = []

  
  for data in tqdm(validloader,desc="Valid epoch {}/{}".format(epoch + 1, EPOCH)):
    ids = data['ids_sen'].to(device,dtype = torch.long)
    mask = data['mask_sen'].to(device,dtype = torch.long)
    token_type_ids = data['token_type_ids_sen'].to(device,dtype = torch.long)
    targets = data['targets'].to(device,dtype = torch.float)
    targets_s = data['targets_senti'].to(device,dtype = torch.float)
    
    t1 = (ids,mask,token_type_ids)
    
    out_val, attn_v ,_,out_val_senti= text_model(t1,'last')

    if (epoch+1 == EPOCH):
      val_out.append((torch.transpose(out_val,0,1)).detach().cpu())
      val_true.append((torch.transpose(targets,0,1)).detach().cpu())

    loss = (criterion1(out_val, targets) + criterion2(out_val_senti, targets_s))/2
    l2.append(loss.item())
    final_val_loss+=loss.item()

  loss_log2.append(np.average(l2))
  curr_lr = optimizer.param_groups[0]['lr']

  print("Epoch {}, loss: {}, val_loss: {}".format(epoch+1, final_train_loss/len(trainloader), final_val_loss/len(validloader)))
  print()
  
'''

def f(x):

  # print(encoded)
  inputs = tokenizer(x,add_special_tokens=False,
    return_token_type_ids=True,
    return_length = True,
    truncation=False)
  # #inputs = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=128, truncation=True) for v in x]).cuda()
        
  ids = torch.tensor(inputs['input_ids']).to(device,dtype = torch.long)
  mask = torch.tensor(inputs['attention_mask']).to(device,dtype = torch.long)
  token_type_ids = torch.tensor(inputs['token_type_ids']).to(device,dtype = torch.long)
  t1=(ids,mask,token_type_ids)
  
  out_test, attn_T,attn_T_S,out_test_senti = text_model(t1,'last')
  ret = out_test.cpu().detach().numpy()
  print(ret)
  return ret


def model_prediction_gpu(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', 
                                        max_length=512, truncation=True) for v in x]).to(device,dtype = torch.long)
    attention_mask = (tv!=0).type(torch.int64).to(device,dtype = torch.long)
    token_type_ids = torch.zeros_like(attention_mask).to(device,dtype = torch.long)
    t1=(tv,attention_mask,token_type_ids)
    out_test, attn_T,attn_T_S,out_test_senti = text_model(t1,'last')
    val = torch.logit(out_test).detach().cpu().numpy()
    return val


method = "custom tokenizer"

# build an explainer by passing a transformers tokenizer
if method == "transformers tokenizer":
    explainer = shap.Explainer(f, tokenizer, output_names=labels)

# build an explainer by explicitly creating a masker
elif method == "default masker":
    masker = shap.maskers.Text(r"\W") # this will create a basic whitespace tokenizer
    explainer = shap.Explainer(f, masker, output_names=labels)

# build a fully custom tokenizer
elif method == "custom tokenizer":
    import re

    def custom_tokenizer(s, return_offsets_mapping=True):
        """ Custom tokenizers conform to a subset of the transformers API.
        """
        pos = 0
        offset_ranges = []
        input_ids = []
        for m in re.finditer(r"\W", s):
            start, end = m.span(0)
            offset_ranges.append((pos, start))
            input_ids.append(s[pos:start])
            pos = end
        if pos != len(s):
            offset_ranges.append((pos, len(s)))
            input_ids.append(s[pos:])
        out = {}
        out["input_ids"] = input_ids
        if return_offsets_mapping:
            out["offset_mapping"] = offset_ranges
        return out


# shap_values = explainer(list(data['text'][:3]))
x = ["hi","hello"]

masker = shap.maskers.Text("transformers tokenizer")
explainer = shap.Explainer(model_prediction_gpu, tokenizer, output_names=labels)
print(f(['[MASK]']))
shap_values = explainer(x)

shap.plots.text(shap_values)

'''
with torch.no_grad():
   for data in testloader:
     ids = data['ids_sen'].to(device,dtype = torch.long)
     mask = data['mask_sen'].to(device,dtype = torch.long)
     token_type_ids = data['token_type_ids_sen'].to(device,dtype = torch.long)
     targets = data['targets'].to(device,dtype = torch.float)
     targets_s = data['targets_senti'].to(device,dtype = torch.float)

     t1=(ids,mask,token_type_ids)
  
     out_test, attn_T,attn_T_S,out_test_senti = text_model(t1,'last')

     test_out.append((torch.transpose(out_test,0,1)).detach().cpu())
     test_true.append((torch.transpose(targets,0,1)).detach().cpu())
     test_true_senti.append((torch.transpose(targets_s,0,1)).detach().cpu())
     attn_test.append((torch.tensor(attn_T)).detach().cpu())
     attn_test_senti.append((torch.tensor(attn_T_S)).detach().cpu())
     test_out_senti.append((torch.transpose(out_test_senti,0,1)).detach().cpu())
     

plt.plot(range(len(loss_log1)), loss_log1)
plt.plot(range(len(loss_log2)), loss_log2)
plt.savefig('graphs/loss_bert_multi.png')

torch.save(text_model.state_dict(), "ckpt/stsb-robert7_10_multi_full_data.pt")

train_out = torch.cat(train_out, 1)
val_out = torch.cat(val_out, 1)
train_true = torch.cat(train_true, 1)
val_true = torch.cat(val_true, 1)
test_out = torch.cat(test_out, 1)
test_out_senti = torch.cat(test_out_senti, 1)
test_true = torch.cat(test_true, 1)
attn_test = torch.cat(attn_test, 0)
attn_test_senti = torch.cat(attn_test_senti, 0)
test_true_senti=torch.cat(test_true_senti, 1)

#train_out, val_out, train_true, val_true = train_out.cpu(), val_out.cpu(), train_true.cpu(), val_true.cpu()
#test_out, test_true = test_out.cpu(), test_true.cpu()
#attn_test = attn_test.cpu()

attnfile = open('outputs/attn_noaspect_multi.pkl', 'wb')
pickle.dump(attn_test, attnfile)

attnfile_ss = open('outputs/attn_sentiment_multi.pkl', 'wb')
pickle.dump(attn_test_senti, attnfile_ss)

test_out_ = (test_out, test_true)
test_out_senti_=(test_out_senti,test_true_senti)

test_outs = open('outputs/main_bert7_test_out_noaspect_multi.pkl', 'wb')
pickle.dump(test_out_, test_outs)

test_outs_sentiment = open('outputs/main_bert7_test_out_sentiments_multi_good.pkl', 'wb')
pickle.dump(test_out_senti_, test_outs_sentiment)



f=open("results/"+"stsb-robert_7_multi"+".txt",'w')
f.close()

def labelwise_metrics(pred, true, split):
  f=open("results/"+"stsb-robert_7_multi"+".txt",'a')
  f.write('-'*25 + split + '-'*25 + '\n\n')
   
  pred = (pred>0.425)

  #batch_size = len(pred)
  batch_size = 16

  pred = pred.to(torch.int)
  true = true.to(torch.int)

  from sklearn.metrics import accuracy_score
  from sklearn.metrics import confusion_matrix

  for i in range(batch_size):
    acc=accuracy_score(true[i],pred[i])

    epsilon = 1e-7
    confusion_vector = pred[i]/true[i]

    true_positives = torch.sum(confusion_vector == 1).item()
    false_positives = torch.sum(confusion_vector == float('inf')).item()
    true_negatives = torch.sum(torch.isnan(confusion_vector)).item()
    false_negatives = torch.sum(confusion_vector == 0).item()

    precision = true_positives/(true_positives+false_positives+epsilon)
    recall = true_positives/(true_positives+false_negatives+epsilon)
    f1 = 2*precision*recall/(precision+recall+epsilon)

    print("Label: {}, acc: {:.3f}, f1: {:.3f}".format(i+1, acc, f1))
    f.write("Label: {}, acc: {:.3f}, f1: {:.3f}\n".format(i+1, acc, f1))
    f.write(str(confusion_matrix(true[i], pred[i])))
    f.write('\n')

  return 0

f1=open("results/"+"stsb-robert_7_multi_senti"+".txt",'w')
f1.close()

def labelwise_metrics_senti(pred, true, split):

  classes = ['MOT +', 'MOT -', 'CLA +', 'CLA -', 'SOU +', 'SOU -', 'SUB +', 'SUB -', 'MEA +', 'MEA -', 'ORI +', 'ORI -', 'REP +', 'REP -']

  f=open("results/"+"stsb-robert_7_multi_senti"+".txt",'a')
  f.write('-'*25 + split + '-'*25 + '\n\n')
   
  pred = (pred>0.425)

  pred = pred.to(torch.int)
  true = true.to(torch.int)
  pred = pred.reshape(7, 2, -1)
  true = true.reshape(7, 2, -1)
  pred = pred.reshape(14, -1)
  true = true.reshape(14, -1)

  batch_size = len(pred)

  from sklearn.metrics import accuracy_score
  from sklearn.metrics import confusion_matrix

  for i in range(batch_size):
    acc=accuracy_score(true[i],pred[i])

    epsilon = 1e-7
    confusion_vector = pred[i]/true[i]

    true_positives = torch.sum(confusion_vector == 1).item()
    false_positives = torch.sum(confusion_vector == float('inf')).item()
    true_negatives = torch.sum(torch.isnan(confusion_vector)).item()
    false_negatives = torch.sum(confusion_vector == 0).item()

    precision = true_positives/(true_positives+false_positives+epsilon)
    recall = true_positives/(true_positives+false_negatives+epsilon)
    f1 = 2*precision*recall/(precision+recall+epsilon)

    #print("Class: {}, acc: {:.3f}, f1: {:.3f}".format(classes[i], acc, f1))
    f.write("Label: {}, acc: {:.3f}, f1: {:.3f}\n".format(classes[i], acc, f1))
    f.write(str(confusion_matrix(true[i], pred[i])))
    f.write('\n')

  return 0


print('Training...')
labelwise_metrics(train_out, train_true, 'TRAINING')
print()
print('Validation...')
labelwise_metrics(val_out, val_true, 'VALIDATION')
print()
print('Test...')
labelwise_metrics(test_out, test_true, 'TESTING')
print('Training...')
labelwise_metrics_senti(train_out, train_true, 'TRAINING')
print()
print('Validation...')
labelwise_metrics_senti(val_out, val_true, 'VALIDATION')
print()
print('Test...')
labelwise_metrics_senti(test_out_senti, test_true_senti, 'TESTING')

'''

There are 5 GPU(s) available.
We will use the GPU: A100-PCIE-40GB


AttributeError: module 'shap' has no attribute 'maskers'