# Building a classifier for personal attacks with the Wikipedia Database

In [41]:
import pandas as pd
import numpy as np
import string
import urllib
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
import re

In [2]:
# download annotated comments and annotations

ANNOTATED_COMMENTS_URL = 'https://ndownloader.figshare.com/files/7554634' 
ANNOTATIONS_URL = 'https://ndownloader.figshare.com/files/7554637' 


def download_file(url, fname):
    urllib.request.urlretrieve(url, fname)

download_file(ANNOTATED_COMMENTS_URL, 'attack_annotated_comments.tsv')
download_file(ANNOTATIONS_URL, 'attack_annotations.tsv')

In [3]:
# panda-fy the data
comments = pd.read_csv('attack_annotated_comments.tsv', sep = '\t', index_col = 0)
annotations = pd.read_csv('attack_annotations.tsv',  sep = '\t')

In [4]:
# LogReg in SKLearn doesn't support continuous vars, so use a 'voting' methodology to determine classification
labels = annotations.groupby('rev_id')['attack'].mean() > 0.5

In [5]:
# join labels and comments
comments['attack'] = labels

In [6]:
# remove newline and tab tokens
comments['comment'] = comments['comment'].apply(lambda x: x.replace("NEWLINE_TOKEN", " "))
comments['comment'] = comments['comment'].apply(lambda x: x.replace("TAB_TOKEN", " "))

In [7]:
comments.query('attack')['comment'].head()

rev_id
801279             Iraq is not good  ===  ===  USA is bad   
2702703      ____ fuck off you little asshole. If you wan...
4632658         i have a dick, its bigger than yours! hahaha
6545332      == renault ==  you sad little bpy for drivin...
6545351      == renault ==  you sad little bo for driving...
Name: comment, dtype: object

In [8]:
# fit a Log Reg with character ngrams between 1 and 5 (takes a while)

# note that the data already has train/test labels
train_comments = comments.query("split=='train'")
test_comments = comments.query("split=='test'")

clf = Pipeline([
    ('tfidf', TfidfVectorizer(max_features = 10000, ngram_range = (1,5), analyzer='word', norm='l2')),
    ('clf', LogisticRegression(C=1, penalty='l1')),
])
clf = clf.fit(train_comments['comment'], train_comments['attack'])
auc = roc_auc_score(test_comments['attack'], clf.predict_proba(test_comments['comment'])[:, 1])
print('Test ROC AUC: %.3f' %auc)

Test ROC AUC: 0.953


In [9]:
# correctly classify nice comment
clf.predict(['Thanks for you contribution, you did a great job!'])

array([False], dtype=bool)

In [10]:
# correctly classify nasty comment with obfuscations
clf.predict(['You are a f** cu*t!'])

array([ True], dtype=bool)

WORKS ON OBFUSCATED CHARACTERS !!!!!!

# Classify Tweets

In [38]:
clf.predict(['f*k you you lame', 
             'i hate you', 
             'Diane Abbott is a champion for rape victims',
             ""])

array([ True,  True, False, False], dtype=bool)

In [39]:
df = pd.read_pickle('../data/interim/aggregated.pkl')

In [42]:
# clean contents
def remove_handles(text):
    return re.sub('@[^\s]+','',text)

def remove_hashtags(text):
    return re.sub('#[^\s]+',string=text,repl='')

def remove_RT(text):
    return re.sub('^RT ',string=text,repl='')

def remove_url(text):
    return re.sub('http[^\s]+',string=text,repl='')

def process_text(text):
    return (remove_url(remove_RT(remove_hashtags(remove_handles(text))))).strip()

def process_text_ht(text):
    return (remove_url(remove_RT(remove_handles(text)))).strip()

df['Clean Contents'] = df['Contents'].map(process_text_ht)

In [None]:
# classify
df['abusive'] = df['Clean Contents'].map(lambda x: clf.predict([x])[0])

In [None]:
sum(df['abusive'].isnull())

In [None]:
df[df['abusive']]['Clean Contents'].head(20)