In [None]:
!wget https://github.com/t-davidson/hate-speech-and-offensive-language/raw/master/data/labeled_data.p

!unzip ../input/jigsaw-toxic-comment-classification-challenge/train.csv.zip
!unzip ../input/jigsaw-toxic-comment-classification-challenge/test.csv.zip
!unzip ../input/jigsaw-toxic-comment-classification-challenge/test_labels.csv.zip

!pip install pandarallel

In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from pandarallel import pandarallel

In [None]:
pandarallel.initialize(progress_bar=True)

In [None]:
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

train_df = pd.read_csv(os.path.join('./', 'train.csv'))
test_df = pd.read_csv(os.path.join('./', 'test.csv')).merge(pd.read_csv(os.path.join('./', 'test_labels.csv')), on='id')
test_df.drop(test_df[test_df[label_cols].min(axis=1) == -1].index, inplace=True)

In [None]:
df = pd.concat([train_df, test_df], ignore_index=True)
df['toxic?'] = df.parallel_apply(lambda row: row[label_cols].any(), axis=1)

In [None]:
df.drop(label_cols, inplace=True, axis=1)
df.drop('id', inplace=True, axis=1)
df.rename(columns={'comment_text': 'text'}, inplace=True)

In [None]:
df

In [None]:
df2 = pd.read_pickle('labeled_data.p')
df2.rename(columns={'tweet': 'text'}, inplace=True)
df2['toxic?'] = df2.parallel_apply(lambda row: int(row['class'] != 2), axis=1)
df2 = df2[['text', 'toxic?']]

In [None]:
df2

In [None]:
df = pd.concat([df, df2], ignore_index=True)

In [None]:
df.to_pickle('df.pkl')

In [None]:
len(df)

In [None]:
len(train_df)

In [None]:
len(test_df)

In [None]:
df.head()

In [None]:
df['toxic?'].describe()

In [None]:
df['toxic?'].value_counts()

In [None]:
!pip install interpret-text

In [None]:
QUICK_RUN = True
TRAIN_DATA_FRACTION = 1
TEST_DATA_FRACTION = 1
NUM_EPOCHS = 1
if QUICK_RUN:
    TRAIN_DATA_FRACTION = 0.1
    TEST_DATA_FRACTION = 0.1
    NUM_EPOCHS = 1
import torch
import torch.nn as nn
    
if torch.cuda.is_available():
    BATCH_SIZE = 1
else:
    BATCH_SIZE = 9

In [None]:
import numpy

In [None]:
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
#split data
df_train, df_test = train_test_split(df, train_size = 0.6, random_state=0)
df_train = df_train.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

df_train = df_train.sample(frac=TRAIN_DATA_FRACTION).reset_index(drop=True)
df_test = df_test.sample(frac=TEST_DATA_FRACTION).reset_index(drop=True)
# encode labels
label_encoder = LabelEncoder()
labels_train = label_encoder.fit_transform(df_train["toxic?"])
labels_test = label_encoder.transform(df_test["toxic?"])
num_labels = len(np.unique(labels_train))
print("Number of unique labels: {}".format(num_labels))
print("Number of training examples: {}".format(df_train.shape[0]))
print("Number of testing examples: {}".format(df_test.shape[0]))

In [None]:
labels_test

In [None]:
df.tail()

In [None]:
from interpret_text.experimental.common.utils_bert import Language, Tokenizer
BERT_CACHE_DIR = "./temp"
LANGUAGE = Language.ENGLISH
tokenizer = Tokenizer(LANGUAGE, to_lower=True, cache_dir=BERT_CACHE_DIR)
tokens_train = tokenizer.tokenize(list(df_train["text"]))
tokens_test = tokenizer.tokenize(list(df_test["text"]))

In [None]:
MAX_LEN = 150
tokens_train, mask_train, _ = tokenizer.preprocess_classification_tokens(tokens_train, MAX_LEN)
tokens_test, mask_test, _ = tokenizer.preprocess_classification_tokens(tokens_test, MAX_LEN)

In [None]:
num_labels=2
from interpret_text.experimental.common.utils_bert import BERTSequenceClassifier
classifier = BERTSequenceClassifier(language=LANGUAGE, num_labels=num_labels, cache_dir=BERT_CACHE_DIR)

In [None]:
from interpret_text.experimental.common.timer import Timer
with Timer() as t:
    classifier.fit(token_ids=tokens_train,
                    input_mask=mask_train,
                    labels=labels_train,    
                    num_epochs=NUM_EPOCHS,
                    batch_size=BATCH_SIZE,    
                    verbose=True)
print("[Training time: {:.3f} hrs]".format(t.interval / 3600))


In [None]:
preds = classifier.predict(token_ids=tokens_test, 
                           input_mask=mask_test, 
                           batch_size=512)

In [None]:
from sklearn.metrics import classification_report, accuracy_score
import json
report = classification_report(labels_test, preds, target_names=label_encoder.classes_, output_dict=True) 
accuracy = accuracy_score(labels_test, preds)
print("accuracy: {}".format(accuracy))
#print(json.dumps(report, indent=4, sort_keys=True))

In [None]:
device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
classifier.model.to(device)
for param in classifier.model.parameters():
    param.requires_grad = False
classifier.model.eval()

In [None]:
from interpret_text.experimental.unified_information import UnifiedInformationExplainer
interpreter_unified = UnifiedInformationExplainer(model=classifier.model, 
                                 train_dataset=list(df_train["text"]), 
                                 device=device, 
                                 target_layer=14, 
                                 classes=label_encoder.classes_)

In [None]:
idx = 10
text = df_test["text"][idx]
true_label = df_test["toxic?"][idx]
predicted_label = label_encoder.inverse_transform([preds[idx]])
print(text, true_label, predicted_label)
explanation_unified = interpreter_unified.explain_local(text, true_label, predicted_label)

In [None]:
idx = 9
text = df_test["text"][idx]
true_label = df_test["toxic?"][idx]
predicted_label = label_encoder.inverse_transform([preds[idx]])
print(text, true_label, predicted_label)
explanation_unified = interpreter_unified.explain_local(text, true_label, predicted_label)

In [None]:
explanation_unified

In [None]:
%matplotlib inline
from interpret_text.experimental.widget import ExplanationDashboard
ExplanationDashboard(explanation_unified)

In [None]:
# sorted feature importance values and feature names
sorted_local_importance_names = explanation_unified.get_ranked_local_names()
sorted_local_importance_values = explanation_unified.get_ranked_local_values()

In [None]:
sorted_local_importance_names 

In [None]:
sorted_local_importance_values

In [None]:
for i in range(99):
    if df_test["toxic?"][i]:
        print(df_test["text"][i])
        explanation_unified = interpreter_unified.explain_local(df_test["text"][i], df_test["toxic?"][i], label_encoder.inverse_transform([preds[i]]))
        sorted_local_importance_names = explanation_unified.get_ranked_local_names()
        sorted_local_importance_values = explanation_unified.get_ranked_local_values()
        print(sorted_local_importance_names[:4],sorted_local_importance_values[:4])


In [None]:
len(df_test)