In [None]:
import os
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from gensim.utils import simple_preprocess
from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.over_sampling import SMOTE

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support

import collections

In [None]:
import torch
from farm.modeling.tokenization import Tokenizer
from farm.data_handler.processor import TextClassificationProcessor
from farm.data_handler.data_silo import DataSilo
from farm.modeling.language_model import LanguageModel
from farm.modeling.prediction_head import TextClassificationHead
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.optimization import initialize_optimizer
from farm.train import Trainer
from farm.utils import MLFlowLogger

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Devices available: {}".format(device))

Devices available: cpu


In [4]:
tokenizer = Tokenizer.load(pretrained_model_name_or_path="bert-base-uncased")
#tokenizer = simple_preprocess

05/05/2021 17:13:06 - INFO - farm.modeling.tokenization -   Loading tokenizer of type 'BertTokenizer'


In [5]:
LABEL_LIST = ['0','1']
processor = TextClassificationProcessor(tokenizer=tokenizer,
                                        max_seq_len=128,
                                        data_dir="",
                                        train_filename="",
                                        test_filename="",
                                        label_list=LABEL_LIST,
                                        metric="",
                                        label_column_name="")

In [6]:
save_dir = ''

In [7]:
import sys
sys.path.append('')

In [8]:
import transformers
from transformers import BertTokenizer
from BERT_explainability.modules.BERT.ExplanationGenerator import Generator
from BERT_explainability.modules.BERT.BertForSequenceClassification import BertForSequenceClassification
from BERT_explainability.modules.BERT.BERT import BertModel
from transformers import BertTokenizer
from BERT_explainability.modules.BERT.ExplanationGenerator import Generator
from transformers import AutoTokenizer

from captum.attr import (
    visualization
)
import torch
%load_ext autoreload
%autoreload 2

In [9]:
#from BERT_explainability.modules.layers_lrp import RelProp
from farm.conversion.transformers import Converter

In [None]:
# load from FARM format
farm_model = AdaptiveModel.load(save_dir, device="cpu")
tokenizer = Tokenizer.load(save_dir)

# convert to transformers
model = Converter.convert_to_transformers(farm_model)[0]

In [None]:
model.save_pretrained('')
tokenizer.save_pretrained('')

In [20]:
model = BertForSequenceClassification.from_pretrained("")

explanations = Generator(model)

classifications = [0, 1] 

In [14]:
#model.encoder

In [21]:
#adp_model = AdaptiveModel.load(save_dir, device=device)

In [24]:
# encode a sentence
text_batch = ["I hate you, you should shut up"]
encoding = tokenizer(text_batch, return_tensors='pt')
input_ids = encoding['input_ids']#.to("cuda")
attention_mask = encoding['attention_mask']#.to("cuda")

# true class is positive - 1
true_class = 1

# generate an explanation for the input
expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=0)[0].detach()

# normalize scores
expl = (expl - expl.min()) / (expl.max() - expl.min())

# get the model classification

output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0].detach(), dim=-1)
print(output)

classification = output.detach().argmax(dim=-1).item()
# get class name
class_name = classifications[classification]

#print(output)
#print(model(input_ids=input_ids, attention_mask=attention_mask))
#print(adp_model(input_ids=input_ids, attention_mask=attention_mask,segment_ids=None,padding_mask=None))

# if the classification is negative, higher explanation scores are more negative
# flip for visualization

#if class_name == "NEGATIVE":
if class_name == 1:
    expl *= (-1)

tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())
print([(tokens[i], expl[i].item()) for i in range(len(tokens))])
vis_data_records = [visualization.VisualizationDataRecord(
                                expl,
                                output[0][classification],
                                classification,
                                true_class,
                                true_class,
                                1,       
                                tokens,
                                1)]
visualization.visualize_text(vis_data_records)

tensor([[0.0019, 0.9981]])
[('[CLS]', -0.0), ('i', -0.15808674693107605), ('hate', -0.7073373198509216), ('you', -0.3964426815509796), (',', -0.19151563942432404), ('you', -0.2967347800731659), ('should', -0.2446955144405365), ('shut', -1.0), ('up', -0.24097494781017303), ('[SEP]', -0.010956291109323502)]


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.00),1.0,1.0,"[CLS] i hate you , you should shut up [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.00),1.0,1.0,"[CLS] i hate you , you should shut up [SEP]"
,,,,


In [25]:
# encode a sentence
text_batch = ["He is great. I love working with him!"]
#text_batch = ["This movie was great."]
encoding = tokenizer(text_batch, return_tensors='pt')
input_ids = encoding['input_ids']#.to("cuda")
attention_mask = encoding['attention_mask']#.to("cuda")

# true class is positive - 1
true_class = 0

# generate an explanation for the input
expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=0)[0].detach()
#print(expl)
# normalize scores
expl = (expl - expl.min()) / (expl.max() - expl.min())

# get the model classification
output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0].detach(), dim=-1)
print(output)

classification = output.argmax(dim=-1).item()
# get class name
class_name = classifications[classification]

#print(output)
#print(model(input_ids=input_ids, attention_mask=attention_mask))
#print(adp_model(input_ids=input_ids, attention_mask=attention_mask,segment_ids=None,padding_mask=None))

# if the classification is negative, higher explanation scores are more negative
# flip for visualization

#if class_name == "NEGATIVE":
if class_name == 1:
    expl *= (-1)

tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())
print([(tokens[i], expl[i].item()) for i in range(len(tokens))])
vis_data_records = [visualization.VisualizationDataRecord(
                                expl,
                                output[0][classification],
                                classification,
                                true_class,
                                true_class,
                                1,       
                                tokens,
                                1)]
visualization.visualize_text(vis_data_records)

tensor([[9.9969e-01, 3.0844e-04]])
[('[CLS]', 0.0), ('he', 0.3804439902305603), ('is', 0.37687814235687256), ('great', 1.0), ('.', 0.44820573925971985), ('i', 0.7494731545448303), ('love', 0.8934091329574585), ('working', 0.662636935710907), ('with', 0.2590741217136383), ('him', 0.557521402835846), ('!', 0.7986675500869751), ('[SEP]', 0.10657887905836105)]


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,0 (1.00),0.0,1.0,[CLS] he is great . i love working with him ! [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,0 (1.00),0.0,1.0,[CLS] he is great . i love working with him ! [SEP]
,,,,


In [26]:
# encode a sentence
text_batch = ["I hate you, i'm going to kill you."]
#text_batch = ["IT IS USERS LIKE YOU THAT MAKE WIKIPEDIA A SHITHOLE. YOU'RE A FUCKER AND GET A LIFE. I COULD VANDALIZE ALL I WANT AND I DARE YOU TO EARSE IT AGOIAN."]
#text_batch = ["Why can't you believe how fat Artie is? Did you see him on his recent appearence on the Tonight Show with Jay Leno? He looks absolutely AWFUL! If I had to put money on it, I'd say that Artie Lange is a can't miss candidate for the 2007 Dead pool!   \n\n  \nKindly keep your malicious fingers off of my above comment, . Everytime you remove it, I will repost it!!!"]
encoding = tokenizer(text_batch, return_tensors='pt')
input_ids = encoding['input_ids']#.to("cuda")
attention_mask = encoding['attention_mask']#.to("cuda")

# true class is positive - 1
true_class = 1

# generate an explanation for the input
expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=0)[0].detach()

# normalize scores
expl = (expl - expl.min()) / (expl.max() - expl.min())

# get the model classification

output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0].detach(), dim=-1)
print(output)

classification = output.detach().argmax(dim=-1).item()
# get class name
class_name = classifications[classification]

#print(output)
#print(model(input_ids=input_ids, attention_mask=attention_mask))
#print(adp_model(input_ids=input_ids, attention_mask=attention_mask,segment_ids=None,padding_mask=None))

# if the classification is negative, higher explanation scores are more negative
# flip for visualization

#if class_name == "NEGATIVE":
if class_name == 1:
    expl *= (-1)

tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())
print([(tokens[i], expl[i].item()) for i in range(len(tokens))])
vis_data_records = [visualization.VisualizationDataRecord(
                                expl,
                                output[0][classification],
                                classification,
                                true_class,
                                true_class,
                                1,       
                                tokens,
                                1)]
visualization.visualize_text(vis_data_records)

tensor([[0.0013, 0.9987]])
[('[CLS]', -0.0), ('i', -0.4822540879249573), ('hate', -0.9698119163513184), ('you', -0.7999723553657532), (',', -0.47392964363098145), ('i', -0.4963516294956207), ("'", -0.027186112478375435), ('m', -0.5629469752311707), ('going', -0.4849805533885956), ('to', -0.4473573565483093), ('kill', -1.0), ('you', -0.8695250749588013), ('.', -0.672978937625885), ('[SEP]', -0.1686214655637741)]


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.00),1.0,1.0,"[CLS] i hate you , i ' m going to kill you . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (1.00),1.0,1.0,"[CLS] i hate you , i ' m going to kill you . [SEP]"
,,,,
