**[Configuration] Mount Google Drive**

In [None]:
# # Mounting Google Drive to Colab (Optional)
# from google.colab import drive
# drive.mount('/content/drive')

**[Configuration] Install Required Python Packages**

In [None]:
# !pip install datasets
# !pip install transformers
# !pip install shap

**[Configuration] Import Required Python Packages**

In [None]:
import pandas as pd
import numpy as np
import scipy as sp
import datasets
import torch
import shap
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import f1_score,  recall_score, precision_score, multilabel_confusion_matrix

**[Model 1] Adding BIO Tag to Training Data**

In [None]:
tsd_train = pd.read_csv('tsd_train.csv')
tsd_train.dtypes

Please comment out ***tsd_train = tsd_train[0:10]*** in the below cell for training on the whole dataset.

In [None]:
tsd_train['label'] = tsd_train.apply(lambda row: 1 if len(row['spans']) > 2 else 0, axis = 1)
tsd_train['toxic']= tsd_train['label']
tsd_train['non-toxic']= 1-tsd_train['label']
tsd_train['labels'] = tsd_train.loc[:, ['toxic','non-toxic']].values.tolist()
tsd_train = tsd_train.drop(['label'], axis=1)
#tsd_train = tsd_train[2501:7935]
tsd_train.head(10)

In [None]:
tsd_train_dataset = datasets.Dataset.from_pandas(tsd_train)
tsd_train_dataset[3]

**[Model 2] Classifying Toxic Comment with BERT**

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

tsd_train_dataset = tsd_train_dataset.map(
                lambda x: tokenizer(
                        x['text'], max_length=256, padding='max_length',
                        truncation=True
                    ), batched=True
)

In [None]:
tsd_train_dataset

In [None]:
tsd_train_dataset = tsd_train_dataset.remove_columns(['text', 'spans', 'toxic', 'non-toxic'])
tsd_train_dataset.set_format(type='torch', output_all_columns=True)

In [None]:
tsd_train_dataset

In [None]:
batch_size = 16
loader = torch.utils.data.DataLoader(tsd_train_dataset,batch_size=batch_size)
PATH = './model_save3'
model = BertForSequenceClassification.from_pretrained(PATH,output_hidden_states = True,)

In [None]:
sigmoid = torch.nn.Sigmoid()

eval_preds, eval_labels = [], []
eval_accuracy = 0

input_ids = tsd_train_dataset['input_ids']
attention_mask = tsd_train_dataset['attention_mask']
labels = tsd_train_dataset['labels']

del tsd_train_dataset

with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    probability = outputs.logits.sigmoid()

In [None]:
sequence_pred = np.array(probability)
sequence_pred [sequence_pred  >= 0.5] = 1
sequence_pred [sequence_pred  < 0.5] = 0
seq_pred_list = sequence_pred.tolist()
tsd_train['seq_pred'] = seq_pred_list

In [None]:
tsd_train.head(1)

In [None]:
t = []
for index, row in tsd_train.iterrows():
    sent = index+1
    word = ""
    span_word = ""
    spans_pos = list(map(int,row['spans'][1:-1].split(","))) if len(row['spans'][1:-1])!=0 else []
    text = row['text'].replace("\n", " ")
    for pos,char in enumerate(text):
        if pos in spans_pos:
            if word != "":
                #print("w" + " "+word)
                t.append((sent,word.strip(),0,'O',row['labels'], row['seq_pred'])) if span_word.strip() != '' else 0
                word = ""
            if char in (" ") and span_word.strip() != "":
                #print("s" + " "+span_word.strip())
                t.append((sent,span_word.strip(),0,'T',row['labels'], row['seq_pred'])) if span_word.strip() != '' else 0
                span_word = ""
                continue
            if char in (',','.','"','?'):
                #print("s" + " "+span_word.strip())#if not ""
                t.append((sent,span_word.strip(),0,'T',row['labels'], row['seq_pred'])) if span_word.strip() != '' else 0
                #print("s" + " "+c)
                t.append((sent,char,0,'T',row['labels'], row['seq_pred']))
                span_word = ""
                continue
            span_word += char
        else:
            if span_word != "":
                #print("s" + " "+span_word)
                t.append((sent,span_word.strip(),0,'T',row['labels'], row['seq_pred'])) if span_word.strip() != '' else 0
                span_word = ""
            if char in (" ") and word.strip() != "":
                #print("w" + " "+word.strip())
                t.append((sent,word.strip(),0,'O',row['labels'], row['seq_pred'])) if word.strip() != '' else 0
                word = ""
                continue
            if char in (',','.','"','?'):
                #print("w" + " "+word.strip())#if not ""
                t.append((sent,word.strip(),0,'O',row['labels'], row['seq_pred'])) if word.strip() != '' else 0
                #print("w" + " "+c)
                t.append((sent,char,0,'O',row['labels'], row['seq_pred']))
                word = ""
                continue
            word += char

    if span_word.strip() != "":
        #print("s" + " "+span_word)
        t.append((sent,span_word.strip(),0,'T',row['labels'], row['seq_pred']))
    if word.strip() != "":
        #print("w" + " "+word)
        t.append((sent,word.strip(),0,'O',row['labels'], row['seq_pred']))

In [None]:
tsd_train_tagged = pd.DataFrame(t)
del t
tsd_train_tagged.columns = ['Sentence_No','Word','POS','Tag','Class','Prediction']
tsd_train_tagged.head(10)

In [None]:
sentences = []
sentence_tags = []
sentence_class = []
sentence_pred = []

current_num = 1
sentence_temp = []
tag_temp = []
class_temp = []
pred_temp = []

for index, row in tsd_train_tagged.iterrows():
    if row['Sentence_No'] == current_num:
        sentence_temp.append(row['Word'])
        tag_temp.append(row['Tag'])
        class_temp.append(row['Class'])
        pred_temp.append(row['Prediction'])
    else:
        sentences.append(sentence_temp)
        sentence_tags.append(tag_temp)
        sentence_class.append(class_temp)
        sentence_pred.append(pred_temp)
        current_num = row['Sentence_No']
        sentence_temp = []
        tag_temp = []
        class_temp = []
        pred_temp =[]
        sentence_temp.append(row['Word'])
        tag_temp.append(row['Tag'])
        class_temp.append(row['Class'])
        pred_temp.append(row['Prediction'])

sentences.append(sentence_temp)
sentence_tags.append(tag_temp)
sentence_class.append(class_temp)
sentence_pred.append(pred_temp)

del tsd_train_tagged

In [None]:
# print(len(sentences))
# print(sentences[4])
# print(sentence_tags[4])
# print(sentence_class[4])
# print(sentence_pred[4])

**[Model 3] Explaining Toxic Comment with SHAP Value**

In [None]:
list_text = []
list_text = tsd_train['text']
del tsd_train
list_text.head(10)

In [None]:
def tokenize_and_preserve_labels(sentences, sentence_tags, sentence_classes, seq_predictions, tokenizer):
    """
    Word piece tokenization makes it difficult to match word labels
    back up with individual word pieces. This function tokenizes each
    word one at a time so that it is easier to preserve the correct
    label for each subword. It is, of course, a bit slower in processing
    time, but it will help our model achieve higher accuracy.
    """

    tokenized_sentences = []
    tags = []
    classes = []
    predictions = []

    for sentence, sentence_tag, sc , pred in zip(sentences, sentence_tags, sentence_classes , seq_predictions):
        tokenized_sentence = []
        tag_temp = []
        class_temp = []
        predictions_temp = []

        for word, tag, cla, pred in zip(sentence, sentence_tag, sc , pred):
            # Tokenize the word and count # of subwords the word is broken into
            tokenized_word = tokenizer.tokenize(word)
            n_subwords = len(tokenized_word)

            # Add the tokenized word to the final tokenized word list
            tokenized_sentence.extend(tokenized_word)

            # Add the same label to the new list of labels `n_subwords` times
            tag_temp.extend([tag] * n_subwords)
            class_temp.extend([cla] * n_subwords)
            predictions_temp.extend([pred] * n_subwords) 

        tokenized_sentences.append(tokenized_sentence)
        tags.append(tag_temp)
        classes.append(class_temp)
        predictions.append(predictions_temp)

    return tokenized_sentences, tags, classes , predictions

In [None]:
tokenized_sentences = []
tags = []
classes = []
predictions = []

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenized_sentences, tags, classes , predictions = tokenize_and_preserve_labels(sentences, sentence_tags, sentence_class,sentence_pred, tokenizer)


In [None]:
del sentences, sentence_tags, sentence_class, sentence_pred

In [None]:
# print(tokenized_sentences[4])
# print(tags[4])
# print(classes[4])
# print(predictions[4])

In [None]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

In [None]:
# define a prediction function
def f(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=256, truncation=True) for v in x]).to(device)
    outputs = model(tv)[0].detach().cpu().numpy()
    scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
    val = sp.special.logit(scores[:,1]) # use one vs rest logit units
    return val

In [None]:
explainer = shap.Explainer(f, tokenizer)
shap_values = explainer(list_text, fixed_context=1)

In [None]:
shap_values[3]

In [None]:
shap.plots.text(shap_values[3])

In [None]:
# import gc
# #del model
# gc.collect()
# torch.cuda.empty_cache()

In [None]:
# !nvidia-smi

In [None]:
# batch_size = 16
# PATH = './model_save3'
# # tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# model = BertForSequenceClassification.from_pretrained(PATH,output_hidden_states = True,)

In [None]:
# device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
# model.to(device)

In [None]:
# model.to(torch.device('cpu'))

In [None]:
def token_embed(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=256, truncation=True) for v in x])#.to(device)
    #outputs = model(tv)[0].detach().cpu().numpy()
    hidden_states = model(tv)[1]
    #hidden_states.numpy()
    batch_token=[]
    for batch in range(len(hidden_states[-1])):## last layer number of sentence
        sent_token=[]
        for token in range(len(hidden_states[-1][batch])):
            #sent_token.append(hidden_states[-1][batch][token].detach().cpu().numpy())
            sent_token.append(hidden_states[-1][batch][token].numpy())
        batch_token.append(sent_token)
    return batch_token
token_embed = token_embed(list_text)

In [None]:
np.asarray(token_embed).shape

In [None]:
np.asarray(token_embed[3]).shape

**Training Data Output**

In [None]:
# output = []

# for (ts, tg, cla, pred, sv, te) in zip(tokenized_sentences, tags, classes, predictions, shap_values, token_embed):
#     base_array = [sv.base_values] * len(sv.values)
#     te_ls = [list(t.flatten()) for t in te]
#     temp = list(zip(cla, pred, ts, tg, sv.values[1:-1], base_array[1:-1], te_ls[1:-1]))
#     output.append(temp)

# del tokenized_sentences, tags, classes, predictions, shap_values, token_embed

In [None]:
output = []

for (ts, tg, cla, pred, sv) in zip(tokenized_sentences, tags, classes, predictions, shap_values):
    base_array = [sv.base_values] * len(sv.values)
    #te_ls = [list(t.flatten()) for t in te]
    temp = list(zip(cla, pred, ts, tg, sv.values[1:-1], base_array[1:-1]))
    output.append(temp)

del tokenized_sentences, tags, classes, predictions, shap_values

In [None]:
final = [word for sentence in output for word in sentence]
del output

In [None]:
# final_df = pd.DataFrame(final)
# del final
# final_df.columns = ['Sentence_Class','Predictions', 'Token','Tag','SHAP','SHAP_Base','Token_Embed']
# final_df[185:195]

In [None]:
final_df = pd.DataFrame(final)
del final
final_df.columns = ['Sentence_Class','Predictions', 'Token','Tag','SHAP','SHAP_Base']
final_df[185:195]

In [None]:
final_df = final_df.drop(columns=['Token'])

In [None]:
final_df.to_hdf('classifier_training_data_shap.h5', key='final_df', mode='w')

In [None]:
#reread = pd.read_hdf('./test.h5')

In [None]:
#reread