In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import model_bias_analysis

In [2]:
joined_tox = pd.read_csv('joined_tox.csv')
train_comments = pd.read_csv('train_comments.csv')
test_comments = pd.read_csv('test_comments.csv')
madlibs_terms = model_bias_analysis.read_identity_terms('adjectives_people.txt')

In [3]:

clf = Pipeline([
    ('vect', CountVectorizer(max_features = 10000, ngram_range = (1,2))),
    ('tfidf', TfidfTransformer(norm = 'l2')),
    ('clf', LogisticRegression()),
])

clf = clf.fit(train_comments['comment'], train_comments['binary_tox'])
auc = roc_auc_score(test_comments['binary_tox'], clf.predict_proba(test_comments['comment'])[:, 1])
print('Test ROC AUC: %.3f' %auc)

Test ROC AUC: 0.951


In [4]:
test_comments["predicted"] = clf.predict(test_comments['comment'])

In [5]:
test_comments.columns

Index(['Unnamed: 0', 'rev_idrev_id', 'toxicity', 'rev_idrev_id.1', 'comment',
       'year', 'logged_in', 'ns', 'sample', 'split', 'binary_tox', 'predicted',
       'lesbian', 'gay', 'bisexual', 'transgender', 'trans', 'queer', 'lgbt',
       'lgbtq', 'homosexual', 'straight', 'heterosexual', 'male', 'female',
       'nonbinary', 'african', 'african american', 'black', 'white',
       'european', 'hispanic', 'latino', 'latina', 'latinx', 'mexican',
       'canadian', 'american', 'asian', 'indian', 'middle eastern', 'chinese',
       'japanese', 'christian', 'muslim', 'jewish', 'buddhist', 'catholic',
       'protestant', 'sikh', 'taoist', 'old', 'older', 'young', 'younger',
       'teenage', 'millenial', 'middle aged', 'elderly', 'blind', 'deaf',
       'paralyzed'],
      dtype='object')

In [6]:
# An example of a comment predicted to be toxic.
# test_comments[test_comments['predicted'] == 1]['comment'][1649]#['comment']#.value_counts()

In [7]:
# This is what the predicted toxic=True test comments looks like after the prediction is done
test_comments[test_comments['predicted'] == 1].head()

Unnamed: 0.1,Unnamed: 0,rev_idrev_id,toxicity,rev_idrev_id.1,comment,year,logged_in,ns,sample,split,...,older,young,younger,teenage,millenial,middle aged,elderly,blind,deaf,paralyzed
149,829,5815399.0,0.7,5815399.0,NEWLINE_TOKENNEWLINE_TOKENIt was anonymous on ...,2004,True,article,random,test,...,False,False,False,False,False,False,False,False,False,False
232,1227,8357736.0,0.3,8357736.0,NEWLINE_TOKENNEWLINE_TOKENYou are a child.,2004,True,user,blocked,test,...,False,False,False,False,False,False,False,False,False,False
233,1228,8359431.0,0.5,8359431.0,`NEWLINE_TOKENNEWLINE_TOKEN::You are not worth...,2004,True,user,blocked,test,...,False,False,False,False,False,False,False,False,False,False
258,1348,8845700.0,1.0,8845700.0,NEWLINE_TOKENNEWLINE_TOKENi am using the sandb...,2004,False,user,blocked,test,...,False,False,False,False,False,False,False,False,False,False
288,1521,9664203.0,0.8,9664203.0,NEWLINE_TOKENNEWLINE_TOKEN== whoa ==NEWLINE_TO...,2005,False,user,blocked,test,...,False,False,False,False,False,False,False,False,False,False


In [8]:
print(clf.predict(['Thanks for you contribution, you did a great job!']))
# Model labels this comment as non-toxic.
print(clf.predict(['People as stupid as you should not edit Wikipedia!']))
# Model labels this comment as toxic.
print(clf.predict(['I am a proud gay']))
# Model also labels this comment as toxic. Uh-oh! We don't think this comment should be toxic!

[0]
[1]
[1]
