In [1]:
import pandas as pd
import html
import re
# from sklearn.feature_extraction.text import CountVectorizer
import nltk
import random
# import numpy as np
from nltk.tokenize import TweetTokenizer
nltk.download('stopwords')
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Functions & Data

### def classify(feature_sets):

In [39]:
def classify(feature_sets):
  k_folds = 10
  test_size = 0.3
  split_point = int(round(len(feature_sets)*(1-test_size),0))
  X = []
  y = []
  for example in feature_sets:
    X.append(example[0])
    y.append(example[1])
  X = pd.DataFrame(X).values
  train_X, test_X = X[:split_point], X[split_point:]
  train_y, test_y = y[:split_point], y[split_point:]

  print('Train MNB model on {} examples with {}-fold cross-validation'.format(len(y), k_folds))
  classifier = MultinomialNB()
  classifier.fit(X, y)
  y_pred = cross_val_predict(classifier, X, y, cv=k_folds)
  cnf = confusion_matrix(y, y_pred)
  # print('Train MNB model on {} training examples with {}-fold cross-validation'.format(len(train_y), k_folds))
  # classifier = MultinomialNB()
  # train_pred = cross_val_predict(classifier, train_X, train_y, cv=k_folds)
  # cnf = confusion_matrix(train_y, train_pred)
  print('Confusion Matrix:\n',cnf)
  tn = cnf[0][0]
  tp = cnf[1][1]
  fn = cnf[1][0]
  fp = cnf[0][1]
  n_obs = sum(sum(cnf))
  acc = (tn+tp)/n_obs
  p_1 = tp/(tp+fp)
  r_1 = tp/(tp+fn)
  f1_1 = (2*p_1*r_1)/(p_1+r_1)
  print('Accuracy:',round(acc,4))
  print('Precision (1):',round(p_1,4))
  print('Recall (1):',round(r_1,4))
  print('F1 (1):',round(f1_1,4))

  # print('\nClassify {} test examples'.format(len(test_y)))
  # classifier.fit(train_X, train_y)
  # test_pred = classifier.predict(test_X)
  # cnf = confusion_matrix(test_y, test_pred)
  # print('Confusion Matrix:\n',cnf)
  # tn = cnf[0][0]
  # tp = cnf[1][1]
  # fn = cnf[1][0]
  # fp = cnf[0][1]
  # n_obs = sum(sum(cnf))
  # acc = (tn+tp)/n_obs
  # p_1 = tp/(tp+fp)
  # r_1 = tp/(tp+fn)
  # f1_1 = (2*p_1*r_1)/(p_1+r_1)
  # print('Accuracy:',round(acc,4))
  # print('Precision (1):',round(p_1,4))
  # print('Recall (1):',round(r_1,4))
  # print('F1 (1):',round(f1_1,4))

  features = list(feature_sets[0][0].keys())
  log_prob_diffs = []
  for i in range(len(features)):
    log_prob_diff = classifier.feature_log_prob_[0][i] - classifier.feature_log_prob_[1][i]
    log_prob_diffs.append(log_prob_diff)
  feature_ranks = sorted(zip(log_prob_diffs, features))

  print_list = ['\nPositive Indicators', '\nNegative Indicators']
  for i in range(2):
    print(print_list[i])
    for j in range(20):
      if i == 0:
        print(feature_ranks[j])
      else:
        print(feature_ranks[-(j+1)])
        
  pred = pd.DataFrame()
  pred['actual'] = y #test_y
  pred['pred'] = y_pred #test_pred
  pred = pred.join(df['tweet'].reset_index(drop=True), how='inner')
  # pred = pred.join(df.iloc[split_point:]['tweet'].reset_index(drop=True),how='inner')
  errors = pred[pred['actual'] != pred['pred']]
  sample_errors = errors.sample(20).reset_index(drop=True)
  print('\nErrors:')
  for i in range(sample_errors.shape[0]):
    print(sample_errors.iloc[i]['actual'],'\t',sample_errors.iloc[i]['tweet'])

### Import & Prep Data

In [33]:
df = pd.read_csv('https://raw.githubusercontent.com/t-davidson/hate-speech-and-offensive-language/master/data/labeled_data.csv')
df = df[['class','tweet']]

df['class'] = df['class'].apply(lambda x: 1 if x == 0 else 0) # translate class from {0: 'hate speech', 1: 'offensive language', 2: 'neither'} to {0: 'not hate speech', 1: 'hate speech'}
df['tweet'] = df['tweet'].apply(html.unescape) # convert emojis from html
df['tweet'] = df['tweet'].apply(lambda x: x.lower())
df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'@[A-z0-9_]{1,15}','USERNAME_MENTION', x)) # replace username mentions with generic placeholder; usernames are alpha-numeric + underscores and not longer than 15 characters (https://help.twitter.com/en/managing-your-account/twitter-username-rules)
df['tweet'] = df['tweet'].apply(lambda x: x.replace(r'"USERNAME_MENTION:', '"RETWEET_SOURCE:')) # replace retweet sources with generic placeholder; USERNAME_MENTION preceded by " and followed by :
df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'http(s)?://[A-z0-9\-._~:/?#\[\]@!$&\'()*+,;=]+','URL_LINK', x)) # replace URLs with generic placeholder

# replace common abbreviated words
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\bbout\b', 'about', x))
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\br\b', 'are', x))
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\bb\b', 'be', x))
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\bhes\b', 'he\'s', x))
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\bdat\b', 'that', x))
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\bu\b', 'you', x))
# df['tweet'] = df['tweet'].apply(lambda x: re.sub(r'\bur\b', 'your', x))

df = df.sample(frac=1, random_state=3).reset_index(drop=True)
tokenize = TweetTokenizer()
df_tokens = df.copy(deep=True)
df_tokens['tweet'] = df_tokens['tweet'].apply(lambda x: tokenize.tokenize(x))
docs = df_tokens.to_records(index=False)

print(docs[0:10])

[(0, list(['versace', 'belts', 'and', 'lv', 'wallets', 'just', "won't", 'cut', 'it', '.', 'need', 'to', 'really', 'show', 'his', 'ass', 'up', '..', 'little', 'bitch']))
 (0, list(['rt', 'USERNAME_MENTION', ':', 'hello', 'USERNAME_MENTION', 'you', 'are', 'now', 'official', 'big', 'time', 'pussy']))
 (1, list(['USERNAME_MENTION', 'just', 'fuckin', 'roasted', 'this', 'faggot']))
 (0, list(['rt', 'USERNAME_MENTION', ':', 'the', 'only', 'thing', 'more', 'irritating', 'than', 'donald', 'sterling', 'being', 'a', 'cunt', 'is', 'adam', 'silver', 'continually', 'apologizing', 'for', 'him', '.']))
 (0, list(['USERNAME_MENTION', "i've", 'seen', 'a', 'number', 'of', 'bitches', 'on', 'here', 'say', "there's", 'no', 'such', 'thing', 'as', 'loose', 'pussy', 'lol', '.', 'ok']))
 (0, list(['USERNAME_MENTION', 'USERNAME_MENTION', 'i', 'was', '200', 'bitch']))
 (0, list(['USERNAME_MENTION', 'pudding', 'it', 'right', 'in', 'her', 'pussy']))
 (0, list(['rt', 'USERNAME_MENTION', ':', 'bitches', 'tweeting', '

# Feature Extraction & Experiments

In [4]:
print('Baseline Accuracy: '+str(round((1-df['class'].mean())*100, 2))+'%')

Baseline Accuracy: 94.23%


## Experiment 1
Top 2000 of all words as features

### Prep

In [5]:
all_words_list = [word for (cat,tweet) in docs for word in tweet]
all_words = nltk.FreqDist(all_words_list)
print(len(all_words))

# remove stop words
stopwords = nltk.corpus.stopwords.words('english')
# print(len(stopwords))
# print(stopwords)

# remove some negation words 
stopwords.extend(['.',',','rt','-',':'])

# newstopwords = [word for word in stopwords if word not in negationwords]
# print(len(newstopwords))
# print(newstopwords)
# print(len(all_words_list))
# all_words_list = [word for word in all_words_list if word not in stopwords]
# print(len(all_words_list))

all_words = nltk.FreqDist(all_words_list)

# get the most frequently appearing keywords in the corpus
common_word_items = all_words.most_common(2000)
common_words = [word for (word,count) in common_word_items]
print(common_words)

22735
['USERNAME_MENTION', ':', '.', 'a', 'bitch', 'i', 'rt', 'the', 'you', ',', 'to', '"', 'and', '!', 'my', 'that', 'bitches', 'in', 'URL_LINK', 'is', '😂', 'like', 'me', 'of', '?', 'on', 'hoes', 'be', 'this', 'pussy', 'for', '...', 'it', 'hoe', 'with', '“', '”', "i'm", 'ass', 'your', 'all', 'up', 'if', 'but', 'just', "don't", 'get', 'fuck', 'so', 'they', 'no', 'when', 'these', 'u', 'got', 'shit', '…', 'nigga', 'not', 'she', 'was', 'are', 'her', 'trash', 'at', 'lol', 'out', '..', 'have', '&', 'about', 'he', "ain't", 'some', 'what', 'do', 'know', 'can', 'niggas', 'we', 'them', 'one', 'how', 'love', "it's", 'or', 'who', 'as', 'go', 'fucking', 'RETWEET_SOURCE', 'yo', "'", 'now', 'want', 'from', 'why', 'bad', '-', "can't", 'then', '😭', 'his', 'good', 'man', "you're", 'too', 'say', 'off', "that's", 'look', 'still', 'make', 'ya', 'hate', 'see', 'an', 'im', '*', 'only', 'back', 'think', 'need', 'never', 'will', 'time', 'really', 'faggot', 'girl', 'people', "y'all", 'real', 'right', 'being', 

In [6]:
def word_features(document, common_words):
    document_words = set(document)
    features = {}
    for word in common_words:
        features['V_{}'.format(word)] = (word in document_words)
    return features

In [7]:
feature_sets = [(word_features(tweet, common_words), cat) for (cat, tweet) in docs]
print(feature_sets[0])

({'V_USERNAME_MENTION': False, 'V_:': False, 'V_.': True, 'V_a': False, 'V_bitch': True, 'V_i': False, 'V_rt': False, 'V_the': False, 'V_you': False, 'V_,': False, 'V_to': True, 'V_"': False, 'V_and': True, 'V_!': False, 'V_my': False, 'V_that': False, 'V_bitches': False, 'V_in': False, 'V_URL_LINK': False, 'V_is': False, 'V_😂': False, 'V_like': False, 'V_me': False, 'V_of': False, 'V_?': False, 'V_on': False, 'V_hoes': False, 'V_be': False, 'V_this': False, 'V_pussy': False, 'V_for': False, 'V_...': False, 'V_it': True, 'V_hoe': False, 'V_with': False, 'V_“': False, 'V_”': False, "V_i'm": False, 'V_ass': True, 'V_your': False, 'V_all': False, 'V_up': True, 'V_if': False, 'V_but': False, 'V_just': True, "V_don't": False, 'V_get': False, 'V_fuck': False, 'V_so': False, 'V_they': False, 'V_no': False, 'V_when': False, 'V_these': False, 'V_u': False, 'V_got': False, 'V_shit': False, 'V_…': False, 'V_nigga': False, 'V_not': False, 'V_she': False, 'V_was': False, 'V_are': False, 'V_her': Fa

### Results

In [45]:
classify(feature_sets)

Train MNB model on 24783 examples with 10-fold cross-validation
Confusion Matrix:
 [[22847   506]
 [ 1006   424]]
Accuracy: 0.939
Precision (1): 0.4559
Recall (1): 0.2965
F1 (1): 0.3593

Positive Indicators
(-4.813004895923639, 'V_spic')
(-3.214536014860113, 'V_niggers')
(-2.893906004318982, 'V_faggots')
(-2.867094746868325, 'V_whitey')
(-2.8513463899001863, 'V_chink')
(-2.8205747312334317, 'V_beaner')
(-2.7335633542438025, 'V_beaners')
(-2.704575817370551, 'V_fags')
(-2.5848180929834523, 'V_nigger')
(-2.539407339802846, 'V_racist')
(-2.494333665177968, 'V_faggot')
(-2.415109623125268, 'V_gook')
(-2.3852566599755853, 'V_coons')
(-2.3280982461356388, 'V_coon')
(-2.328098246135637, 'V_americans')
(-2.215620262708949, 'V_dyke')
(-2.1972586451768263, 'V_queer')
(-2.128608885719318, 'V_fag')
(-2.1274275506734863, 'V_mention')
(-2.040416173683857, 'V_border')

Negative Indicators
(3.160839480021048, 'V_bird')
(2.9118835433994352, 'V_charlie')
(2.094750383058499, 'V_sex')
(1.9392654802181033,

## Experiment 2
Top 2000 most frequent words after stopwords removed

### Prep

In [46]:
all_words_list = [word for (cat,tweet) in docs for word in tweet]
all_words = nltk.FreqDist(all_words_list)
# print(len(all_words))

# remove stop words
stopwords = nltk.corpus.stopwords.words('english')
# print(len(stopwords))
# print(stopwords)

# remove some negation words 
stopwords.extend(['.',',','rt','-',':'])

# newstopwords = [word for word in stopwords if word not in negationwords]
# print(len(newstopwords))
# print(newstopwords)
print(len(all_words_list))
all_words_list = [word for word in all_words_list if word not in stopwords]
print(len(all_words_list))

all_words = nltk.FreqDist(all_words_list)

# get the most frequently appearing keywords in the corpus
common_word_items = all_words.most_common(2000)
common_words = [word for (word,count) in common_word_items]
print(common_words)

404746
244945
['USERNAME_MENTION', 'bitch', '"', '!', 'bitches', 'URL_LINK', '😂', 'like', '?', 'hoes', 'pussy', '...', 'hoe', '“', '”', "i'm", 'ass', 'get', 'fuck', 'u', 'got', 'shit', '…', 'nigga', 'trash', 'lol', '..', '&', "ain't", 'know', 'niggas', 'one', 'love', 'go', 'fucking', 'RETWEET_SOURCE', 'yo', "'", 'want', 'bad', "can't", '😭', 'good', 'man', 'say', "that's", 'look', 'still', 'make', 'ya', 'hate', 'see', 'im', '*', 'back', 'think', 'need', 'never', 'time', 'really', 'faggot', 'girl', 'people', "y'all", 'real', 'right', 'even', 'lmao', 'said', 'white', 'would', 'bird', '😩', 'let', 'wit', 'dick', 'wanna', 'day', '/', 'bout', 'stop', 'damn', 'little', 'tell', 'call', 'talk', 'gotta', 'come', 'da', 'cause', 'dont', 'life', 'take', 'new', 'gonna', 'dat', 'every', 'charlie', 'always', 'n', '’', 'money', 'niggah', 'better', 'going', 'girls', 'lil', '️', 'eat', '2', 'ever', 'ghetto', 'give', 'dumb', 'fuckin', 'retarded', 'aint', 'yellow', 'twitter', 'talking', 'ugly', '😒', 'cunt',

In [47]:
feature_sets = [(word_features(tweet, common_words), cat) for (cat, tweet) in docs]
print(feature_sets[0])

({'V_USERNAME_MENTION': False, 'V_bitch': True, 'V_"': False, 'V_!': False, 'V_bitches': False, 'V_URL_LINK': False, 'V_😂': False, 'V_like': False, 'V_?': False, 'V_hoes': False, 'V_pussy': False, 'V_...': False, 'V_hoe': False, 'V_“': False, 'V_”': False, "V_i'm": False, 'V_ass': True, 'V_get': False, 'V_fuck': False, 'V_u': False, 'V_got': False, 'V_shit': False, 'V_…': False, 'V_nigga': False, 'V_trash': False, 'V_lol': False, 'V_..': True, 'V_&': False, "V_ain't": False, 'V_know': False, 'V_niggas': False, 'V_one': False, 'V_love': False, 'V_go': False, 'V_fucking': False, 'V_RETWEET_SOURCE': False, 'V_yo': False, "V_'": False, 'V_want': False, 'V_bad': False, "V_can't": False, 'V_😭': False, 'V_good': False, 'V_man': False, 'V_say': False, "V_that's": False, 'V_look': False, 'V_still': False, 'V_make': False, 'V_ya': False, 'V_hate': False, 'V_see': False, 'V_im': False, 'V_*': False, 'V_back': False, 'V_think': False, 'V_need': True, 'V_never': False, 'V_time': False, 'V_really': 

### Results

In [48]:
classify(feature_sets)

Train MNB model on 24783 examples with 10-fold cross-validation
Confusion Matrix:
 [[22913   440]
 [ 1048   382]]
Accuracy: 0.94
Precision (1): 0.4647
Recall (1): 0.2671
F1 (1): 0.3393

Positive Indicators
(-4.730887563747643, 'V_spic')
(-3.132418682684116, 'V_niggers')
(-3.0569111301759726, 'V_#faggots')
(-2.8117886721429866, 'V_faggots')
(-2.78497741469233, 'V_whitey')
(-2.769229057724191, 'V_chink')
(-2.7384573990574363, 'V_beaner')
(-2.651446022067807, 'V_beaners')
(-2.6224584851945556, 'V_fags')
(-2.5179146294432844, 'V_trailer')
(-2.502700760807457, 'V_nigger')
(-2.457290007626849, 'V_racist')
(-2.4122163330019726, 'V_faggot')
(-2.3329922909492726, 'V_gook')
(-2.3031393277995917, 'V_coons')
(-2.2459809139596434, 'V_coon')
(-2.2459809139596416, 'V_americans')
(-2.133502930532952, 'V_dyke')
(-2.115141313000831, 'V_queer')
(-2.046491553543323, 'V_fag')

Negative Indicators
(3.2429568121970433, 'V_bird')
(2.9940008755754306, 'V_charlie')
(2.1768677152344944, 'V_sex')
(2.0213828123940