In [1]:
%load_ext autoreload
%autoreload 2
import os
import os.path
import re
import string

import numpy as np
import sklearn
import sklearn.model_selection
import sklearn.linear_model
import sklearn.ensemble
import spacy
import sys
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

!pip install anchor_exp
from anchor import anchor_text
import time



In [2]:
def cleanText(var):
    # replace punctuation with spaces
    var = re.sub('[{}]'.format(string.punctuation), " ", var)
    # remove double spaces
    var = re.sub(r'\s+', " ", var)
    # put in lower case
    var = var.lower().split()
    # remove words that are smaller than 3 characters
    var = [w for w in var if len(w) >= 3]
    var = " ".join(var)
    return var

# Removes 'rt' from all input data
def my_clean(text):
    text = text.lower().split()
    text = [w for w in text]
    text = " ".join(text)
    text = re.sub(r"rt", "", text)
    return text

# Removes 'rt' from all input data
# Removes emojis from all input data
def YOUTUBE_my_clean(text):
    text = text.lower().split()
    text = [w for w in text]
    text = " ".join(text)
    text = re.sub(r"rt", "", text)
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    text = re.sub(emoji_pattern, '', text)
    return text


def strip_links(text):
    link_regex = re.compile('((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)', re.DOTALL)
    links = re.findall(link_regex, text)
    for link in links:
        text = text.replace(link[0], ', ')
    return text


def strip_all_entities(text):
    entity_prefixes = ['@', '#']
    for separator in string.punctuation:
        if separator not in entity_prefixes:
            text = text.replace(separator, ' ')
    words = []
    for word in text.split():
        word = word.strip()
        if word:
            if word[0] not in entity_prefixes:
                words.append(word)
    return ' '.join(words)


def preProcessing(strings):
    clean_tweet_texts = []
    for string in strings:
        clean_tweet_texts.append(my_clean(strip_all_entities(strip_links(string))))
        # clean_tweet_texts.append(my_clean(string))
    return clean_tweet_texts

def YOUTUBE_preProcessing(strings):
    clean_tweet_texts = []
    for string in strings:
        clean_tweet_texts.append(YOUTUBE_my_clean(strip_all_entities(strip_links(string))))
        # clean_tweet_texts.append(my_clean(string))
    return clean_tweet_texts

def load_dataset(dataset):
    if dataset == "polarity":
        df = pd.read_csv('https://raw.githubusercontent.com/lstate/X-SPELLS-V2/main/data/' + dataset_name + '_tweets.csv', encoding='utf-8')
        X = df['tweet'].values
        y = df['class'].values

    elif dataset == "hate":
        df = pd.read_csv('https://raw.githubusercontent.com/lstate/X-SPELLS-V2/main/data/' + dataset_name + '_tweets.csv', encoding='utf-8')
        # Removing the offensive comments, keeping only neutral and hatespeech,
        # and convert the class value from 2 to 1 for simplification purposes
        df = df[df['class'] != 1]
        X = df['tweet'].values
        y = df['class'].apply(lambda x: 1 if x == 2 else 0).values

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y, test_size=0.25)

    X_test = preProcessing(X_test)

    return X_test, y_test

def YOUTUBE_get_text_data():
    df = pd.read_csv('https://raw.githubusercontent.com/lstate/X-SPELLS-V2/main/data/YouTube-Spam-Collection-v1/youtube.csv', encoding='utf-8')

    X = df["CONTENT"].values
    y = df["CLASS"].values
        
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y, test_size=0.25)
    
    X_test = YOUTUBE_preProcessing(X_test)
    
    # delete x/y where there is no more content after preprocessing or we have more than 140 characters (e.g. comment was only an url)
    
    indx = []
    for i in range(len(X_test)):
        if len(X_test[i]) == 0:
            indx.append(i)
        elif len(X_test[i]) > 140:
            indx.append(i)     
    X_test = np.delete(X_test, indx, 0)
    y_test = np.delete(y_test, indx, 0)
    
    return X_test, y_test

In [3]:
!pip install spacy && python -m spacy download en_core_web_sm
!pip install torch transformers spacy && python -m spacy download en_core_web_sm

nlp = spacy.load('en_core_web_sm')

Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 5.1 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 5.1 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


# Hate Speech

In [4]:
dataset_name = 'hate'
data, labels = load_dataset(dataset_name)
train, test, train_labels, test_labels = sklearn.model_selection.train_test_split(data, labels, test_size=0.25, random_state=42)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

vectorizer = TfidfVectorizer()
vectorizer.fit(train)
train_vectors = vectorizer.transform(train)
test_vectors = vectorizer.transform(test)

c = sklearn.ensemble.RandomForestClassifier()
c.fit(train_vectors, train_labels)

preds = c.predict(test_vectors)

print(classification_report(test_labels, preds))
print("The accuracy score is {:.2%}".format(accuracy_score(test_labels, preds)))

def predict_lr(texts):
    return c.predict(vectorizer.transform(texts))

              precision    recall  f1-score   support

           0       0.91      0.67      0.77        87
           1       0.90      0.98      0.94       263

    accuracy                           0.90       350
   macro avg       0.90      0.82      0.85       350
weighted avg       0.90      0.90      0.89       350

The accuracy score is 90.00%


## Without using BERT

In [5]:
explainer = anchor_text.AnchorText(nlp, ['hate-speech', 'neutral'], use_unk_distribution=True)

np.random.seed(1)
text = 'america is white trash and so are all americans'
pred = explainer.class_names[predict_lr([text])[0]]
alternative =  explainer.class_names[1 - predict_lr([text])[0]]
exp = explainer.explain_instance(text, predict_lr, threshold=0.95)

print('Text: %s' % text)
print('Prediction: %s' % pred)
print()
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(partial_index=0, only_different_prediction=True)]))

Text: america is white trash and so are all americans
Prediction: hate-speech

Anchor: white
Precision: 1.00

Examples where anchor applies and model predicts hate-speech:

UNK UNK white trash UNK UNK UNK UNK UNK
UNK UNK white UNK and so are all americans
UNK is white trash and so are all americans
UNK UNK white UNK UNK so are all americans
UNK UNK white UNK UNK UNK UNK UNK americans
america UNK white UNK UNK UNK are UNK americans
america is white trash UNK so UNK UNK americans
UNK is white trash UNK so UNK all americans
america is white trash UNK so are UNK americans
UNK is white trash UNK so are UNK UNK

Examples where anchor applies and model predicts neutral:




## Using BERT

In [6]:
explainer = anchor_text.AnchorText(nlp, ['hate-speech', 'neutral'], use_unk_distribution=False)

np.random.seed(1)
text = 'america is white trash and so are all americans'
pred = explainer.class_names[predict_lr([text])[0]]
alternative =  explainer.class_names[1 - predict_lr([text])[0]]
b = time.time()
exp = explainer.explain_instance(text, predict_lr, threshold=0.95, verbose=False)
print('Time: %s' % (time.time() - b))

print('Text: %s' % text)
print('Prediction: %s' % pred)
print()
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

Time: 1666.1465530395508
Text: america is white trash and so are all americans
Prediction: hate-speech

Anchor: white AND america AND trash AND is AND and
Precision: 0.97

Examples where anchor applies and model predicts hate-speech:

america is white trash and residents are white trash
america is white trash and therefore promotes humanity .
america is white trash and a ##gro ##cer ##rado
america is white trash and blacks are racist .
america is white trash and blacks are blue trash
america is white trash and trout are prohibited .
america is white trash and operates these places :
america is white trash and offers tuition courses .
america is white trash and grass ##roots # #
america is white trash and whites are black .

Examples where anchor applies and model predicts neutral:

america is white trash and poles are dry trash
america is white trash and others are green trash
america is white trash and sees what wants us
america is white trash and we are nothing tomorrow
america is wh

# Polarity

In [7]:
dataset_name = 'polarity'
data, labels = load_dataset(dataset_name)
train, test, train_labels, test_labels = sklearn.model_selection.train_test_split(data, labels, test_size=0.25, random_state=42)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

vectorizer = TfidfVectorizer()
vectorizer.fit(train)
train_vectors = vectorizer.transform(train)
test_vectors = vectorizer.transform(test)

c = sklearn.ensemble.RandomForestClassifier()
c.fit(train_vectors, train_labels)

preds = c.predict(test_vectors)

print(classification_report(test_labels, preds))
print("The accuracy score is {:.2%}".format(accuracy_score(test_labels, preds)))

def predict_lr(texts):
    return c.predict(vectorizer.transform(texts))

              precision    recall  f1-score   support

           0       0.65      0.70      0.67       339
           1       0.66      0.61      0.63       328

    accuracy                           0.65       667
   macro avg       0.65      0.65      0.65       667
weighted avg       0.65      0.65      0.65       667

The accuracy score is 65.37%


## Without using BERT

In [8]:
explainer = anchor_text.AnchorText(nlp, ['negative', 'positive'], use_unk_distribution=True)

np.random.seed(1)
text = 'eccentric enough to stave off doldrums caruso s self conscious debut is also eminently forgettable'
pred = explainer.class_names[predict_lr([text])[0]]
alternative =  explainer.class_names[1 - predict_lr([text])[0]]
exp = explainer.explain_instance(text, predict_lr, threshold=0.95)

print('Text: %s' % text)
print('Prediction: %s' % pred)
print()
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(partial_index=0, only_different_prediction=True)]))

Text: eccentric enough to stave off doldrums caruso s self conscious debut is also eminently forgettable
Prediction: negative

Anchor: forgettable
Precision: 1.00

Examples where anchor applies and model predicts negative:

eccentric UNK UNK UNK off UNK caruso s self conscious UNK is also UNK forgettable
eccentric enough UNK UNK off doldrums caruso s self UNK UNK is also UNK forgettable
eccentric enough to UNK UNK doldrums caruso UNK UNK UNK debut UNK UNK eminently forgett
eccentric UNK to stave UNK doldrums UNK UNK UNK UNK debut UNK also UNK forgettable
eccentric enough to UNK UNK doldrums UNK UNK self conscious UNK is UNK eminently forget
eccentric UNK UNK UNK UNK UNK UNK UNK self UNK debut UNK also eminently forgettable
eccentric enough to stave off doldrums caruso s UNK UNK debut UNK UNK UNK forgettable
eccentric UNK to stave off UNK caruso s UNK UNK UNK is UNK UNK forgettable
UNK UNK UNK UNK off UNK UNK UNK self UNK UNK is UNK UNK forgettable
eccentric UNK UNK stave off UNK caruso

## Using BERT

In [9]:
explainer = anchor_text.AnchorText(nlp, ['negative', 'positive'], use_unk_distribution=False)

np.random.seed(1)
text = 'eccentric enough to stave off doldrums caruso s self conscious debut is also eminently forgettable'
pred = explainer.class_names[predict_lr([text])[0]]
alternative =  explainer.class_names[1 - predict_lr([text])[0]]
b = time.time()
exp = explainer.explain_instance(text, predict_lr, threshold=0.95, verbose=False)
print('Time: %s' % (time.time() - b))

print('Text: %s' % text)
print('Prediction: %s' % pred)
print()
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

Time: 1075.618047952652
Text: eccentric enough to stave off doldrums caruso s self conscious debut is also eminently forgettable
Prediction: negative

Anchor: eccentric
Precision: 1.00

Examples where anchor applies and model predicts negative:

eccentric ##о ##в п ##л ##ю ##ь л @ л ##л ##ь ##ь л л
eccentric ρ ##ρ _ ρ # ρ ² ##δ W ##ᵐ ρ * ρ ##²
eccentric ##φ = \ off \ φ \ \ φ \ { φ } \
eccentric search to perform fusion energy to predicted orbital angular velocity causes orbital disturbance .
eccentric ##υ ##ε ##λ ##ε # λ # λ # λ # κ ; radius
eccentric ##म ##च ##ा ##ा # # # # # # # # # forgettable
eccentric - to ##re ##versing ^ { bracket } var { segment } } {
eccentric formula to obtain angular coordinates using spin space metric with formula _ 67 ⟩
eccentric analogy to ∞ ##∞ # ∞ ∞ ##♦ ##∞ # # # # forgettable
eccentric response to torque ##back ##rank # # # # # is ##abe ##aa #

Examples where anchor applies and model predicts positive:




# Youtube

In [10]:
dataset_name = 'youtube'
data, labels = YOUTUBE_get_text_data()
train, test, train_labels, test_labels = sklearn.model_selection.train_test_split(data, labels, test_size=0.25, random_state=42)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

vectorizer = TfidfVectorizer()
vectorizer.fit(train)
train_vectors = vectorizer.transform(train)
test_vectors = vectorizer.transform(test)

c = sklearn.ensemble.RandomForestClassifier()
c.fit(train_vectors, train_labels)

preds = c.predict(test_vectors)

print(classification_report(test_labels, preds))
print("The accuracy score is {:.2%}".format(accuracy_score(test_labels, preds)))

def predict_lr(texts):
    return c.predict(vectorizer.transform(texts))

              precision    recall  f1-score   support

           0       0.87      0.96      0.91        55
           1       0.95      0.82      0.88        45

    accuracy                           0.90       100
   macro avg       0.91      0.89      0.90       100
weighted avg       0.90      0.90      0.90       100

The accuracy score is 90.00%


## Without using BERT

In [11]:
explainer = anchor_text.AnchorText(nlp, ['no spam', 'spam'], use_unk_distribution=True)

np.random.seed(1)
text = 'check out this video on youtube'
pred = explainer.class_names[predict_lr([text])[0]]
alternative =  explainer.class_names[1 - predict_lr([text])[0]]
exp = explainer.explain_instance(text, predict_lr, threshold=0.95)

print('Text: %s' % text)
print('Prediction: %s' % pred)
print()
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(partial_index=0, only_different_prediction=True)]))

Text: check out this video on youtube
Prediction: spam

Anchor: out
Precision: 0.98

Examples where anchor applies and model predicts spam:

UNK out this video on UNK
check out UNK UNK UNK youtube
UNK out this video UNK UNK
UNK out this video UNK youtube
check out UNK video on youtube
UNK out this video UNK youtube
UNK out this UNK on UNK
check out UNK UNK on youtube
UNK out this video on UNK
UNK out this video UNK youtube

Examples where anchor applies and model predicts no spam:

UNK out UNK UNK UNK UNK
UNK out UNK UNK UNK UNK
UNK out UNK UNK UNK UNK


## Using BERT

In [12]:
explainer = anchor_text.AnchorText(nlp, ['no spam', 'spam'], use_unk_distribution=False)

np.random.seed(1)
text = 'check out this video on youtube'
pred = explainer.class_names[predict_lr([text])[0]]
alternative =  explainer.class_names[1 - predict_lr([text])[0]]
b = time.time()
exp = explainer.explain_instance(text, predict_lr, threshold=0.95, verbose=False)
print('Time: %s' % (time.time() - b))

print('Text: %s' % text)
print('Prediction: %s' % pred)
print()
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

Time: 196.63774037361145
Text: check out this video on youtube
Prediction: spam

Anchor: out AND youtube
Precision: 1.00

Examples where anchor applies and model predicts spam:

cut out the video on youtube
check out this page on youtube
check out movie trailers on youtube
file out play ##list on youtube
get out loud online on youtube
check out this name on youtube
check out from track manager youtube
check out words live on youtube
lock out - mobile animated youtube
burning out the sun " youtube

Examples where anchor applies and model predicts no spam:


