# Imports

How does annotation bias in training datasets affect the accuracy of a given hate speech classifier?

Baseline: Binary Classifier (Hate / Not Hate) Support Vector Machine

Experiment with different annotated hate speech datasets

Report on ethics of defining hate speech
Importance on increasing accuracy, burat what cost does this increase in accuracy come at?
Does the computer recognize that some people are more sensitive than others?
The computer is analyzing patterns in a given dataset to conclude its own definition of hate speech



In [1]:
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
import re
import os
import codecs
import logging
import warnings; warnings.filterwarnings('ignore')

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import svm
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from nltk.tokenize import word_tokenize

import gensim

# Load in Data

In [2]:
df=pd.read_csv("dataset.csv")
df.drop_duplicates()
hate_speech = df.query('Class=="hate"').copy().reset_index()
test_hate = hate_speech[:162].copy()
data_hate = hate_speech[162:3009].copy()

non_hate_speech = df.query('Class=="none"').copy().reset_index()
test_non = non_hate_speech[:162].copy()
data_none = non_hate_speech[162:3009].copy()

test_data = pd.merge(test_hate, test_non, how='outer').sample(frac=1).reset_index()
data = pd.merge(data_hate, data_none, how='outer').sample(frac=1).reset_index()
df = pd.merge(data, test_data, how='outer').copy()

df.isnull().values.any()

False

# Pre-Processing

In [3]:
def tweet_processing(raw_tweet):
    letters_only=re.sub("[^a-zA-Z]"," ",raw_tweet)
    words=letters_only.lower().split()
    stops=set(stopwords.words("english"))
    m_w=[w for w in words if not w in stops]
    return (" ".join(m_w))

In [4]:
num_tweets=data["Tweets"].size
clean_tweet=[]
for i in range(0,num_tweets):
    clean_tweet.append(tweet_processing(data["Tweets"][i]))
data["Tweets"]=clean_tweet 


num_tweets_test=test_data["Tweets"].size
clean_tweet_test=[]
for i in range(0,num_tweets_test):
    clean_tweet_test.append(tweet_processing(test_data["Tweets"][i]))
test_data["Tweets"]=clean_tweet_test

# SVM

In [5]:
X_train, X_test_svm, Y_train, Y_test_svm = train_test_split(df.Tweets, df.Class, test_size=0.2)

train_dict = {}
for tweet_class in Y_train:
    if tweet_class in train_dict:
        train_dict[tweet_class] += 1
    else:
        train_dict[tweet_class] = 1
        
test_dict = {}  
for tweet_class in Y_test_svm:
    if tweet_class in test_dict:
        test_dict[tweet_class] += 1
    else:
        test_dict[tweet_class] = 1

data_dict = {"Train Data Split" : {"Hate Speech" : (1 - train_dict['none'] / len(Y_train)) * 100, "Non-Hate Speech" : (train_dict['none'] / len(Y_train) * 100)},
            "Test Data Split" : {"Hate Speech" : (1 - test_dict['none'] / len(Y_test_svm)) * 100, "Non-Hate Speech" : (test_dict['none'] / len(Y_test_svm) *100)}}

ds = pd.DataFrame(data_dict)
print(data_dict)

In [6]:
vectorizer=CountVectorizer(analyzer = "word",tokenizer = None,preprocessor = None,stop_words = None,max_features = 5000)

train_data_features=vectorizer.fit_transform(X_train)
train_data_features=train_data_features.toarray()

test_data_features=vectorizer.transform(X_test_svm)
test_data_features=test_data_features.toarray()

#SVM with linear kernel
clf=svm.SVC(kernel='linear',C=1.0)
print ("Training")
clf.fit(train_data_features,Y_train)

print ("Testing")
predicted=clf.predict(test_data_features)
accuracy=np.mean(predicted==Y_test_svm)
print ("Accuracy: ",accuracy)
score_svm=precision_recall_fscore_support(Y_test_svm, predicted, average='weighted')
print("Precision:", score_svm[0], "\nRecall:", score_svm[1], "\nF1 Score:", score_svm[2])

Training
Testing
Accuracy:  0.8637873754152824
Precision: 0.8652263156781006 
Recall: 0.8637873754152824 
F1 Score: 0.8637332422311453


## SVM Confusion Matrix

In [20]:
newDict = {"Tweets" : X_test_svm, "Actual" : Y_test_svm, "Predicted" : predicted}

df2 = pd.DataFrame(newDict).reset_index()

true_pos = df2.query('Actual == "hate" and Actual == Predicted').copy().reset_index()
true_neg = df2.query('Actual == "none" and Actual == Predicted').copy().reset_index()
false_neg = df2.query('Actual == "hate" and Actual != Predicted').copy().reset_index()
false_pos = df2.query('Actual == "none" and Actual != Predicted').copy().reset_index()

print("TP =", len(true_pos) / len(df2) * 100, "FN =", len(false_neg) / len(df2) * 100, "\nTN  =", len(true_neg) / len(df2) * 100, "FP =", len(false_pos) / len(df2) * 100)

TP = 41.52823920265781 FN = 7.392026578073089 
TN  = 45.51495016611295 FP = 5.564784053156146


# Randomized Re-Annotation Algorithm Implementation and Application

Algorithm:

params: Size of Dataset, Percentage of False Negatives from Orig Model Run

{   
    Compute number of Tweets to re-annotate by using the percentage of FN on Size of DataSet
    num = Dataset * FN-Percentage
    randomly sample num not hate from original dataset 
    re-annotate from not hate to hate
}

Is hate predicts its not

the rules for what is not over prioritize 

In [35]:
n = int( len(false_neg) / len(df2) * len(false_neg))

print("Number of Re-Annotations: " + str(n) + " : " + str(len(df)))

false_tweets = false_neg.sample(n=n).Tweets
print(false_tweets)
t = []
for x in false_tweets:
    t.append(x)
t = pd.Series(t)
len(t)

Number of Re-Annotations: 6 : 6018
65    @TicklishQuill It's actually only one face.  I...
85    RT @TwoThug4U: @YesYoureSexist but honestly st...
76    @dgbattaglia Saw this this morning... http://t...
41    @GRIMACHU @AliRadicali @avacadosoup  Sealionin...
46    @jkronenwetter18 I'm sure the commentators you...
77    @AJKauffman @CocaCola Yeah, if there's one thi...
Name: Tweets, dtype: object


6

In [31]:
copy_df = df.copy()

copy_df.query('Tweets in @t').replace('hate', 'none')

fileName = "random_reannotated_" + ".csv"
copy_df.to_csv(fileName)
    
# Re-Annotate Data
r_df = pd.read_csv(fileName)

In [34]:
df.head()

Unnamed: 0.1,level_0,index,Unnamed: 0,Tweet Id,Tweets,User Id,Screen Name,Class
0,779,1399,1399,507035960325177000,RT @tylerxclark: I'm not sexist but have you e...,930620467,YesYoureSexist,hate
1,2891,908,908,469139702549729000,"@Mike_Antoniou15 ""I feel like"" = ""I have no ev...",930620467,YesYoureSexist,none
2,678,1165,1165,489515039141810000,RT @Justin_DvG: @UberFacts this is why DC &gt;...,930620467,YesYoureSexist,hate
3,981,1917,1917,546362209127759000,RT @RykerDomz When a woman gets in a wreck I'm...,930620467,YesYoureSexist,hate
4,835,1543,1543,517430515616731000,I have a hint for you in my usename @et_tweet_...,930620467,YesYoureSexist,hate


In [9]:


num_tweets=data["Tweets"].size
clean_tweet=[]
for i in range(0,num_tweets):
    clean_tweet.append(tweet_processing(data["Tweets"][i]))
data["Tweets"]=clean_tweet 


num_tweets_test=test_data["Tweets"].size
clean_tweet_test=[]
for i in range(0,num_tweets_test):
    clean_tweet_test.append(tweet_processing(test_data["Tweets"][i]))
test_data["Tweets"]=clean_tweet_test


In [10]:
X_train, X_test_svm, Y_train, Y_test_svm = train_test_split(df.Tweets, df.Class, test_size=0.2)

train_dict = {}
for tweet_class in Y_train:
    if tweet_class in train_dict:
        train_dict[tweet_class] += 1
    else:
        train_dict[tweet_class] = 1
        
test_dict = {}  
for tweet_class in Y_test_svm:
    if tweet_class in test_dict:
        test_dict[tweet_class] += 1
    else:
        test_dict[tweet_class] = 1

data_dict = {"Train Data Split" : {"Hate Speech" : (1 - train_dict['none'] / len(Y_train)) * 100, "Non-Hate Speech" : (train_dict['none'] / len(Y_train) * 100)},
            "Test Data Split" : {"Hate Speech" : (1 - test_dict['none'] / len(Y_test_svm)) * 100, "Non-Hate Speech" : (test_dict['none'] / len(Y_test_svm) *100)}}

ds = pd.DataFrame(data_dict)

print(data_dict)

{'Train Data Split': {'Hate Speech': 50.27004570004154, 'Non-Hate Speech': 49.72995429995846}, 'Test Data Split': {'Hate Speech': 48.9202657807309, 'Non-Hate Speech': 51.0797342192691}}


In [11]:
vectorizer=CountVectorizer(analyzer = "word",tokenizer = None,preprocessor = None,stop_words = None,max_features = 5000)

train_data_features=vectorizer.fit_transform(X_train)
train_data_features=train_data_features.toarray()

test_data_features=vectorizer.transform(X_test_svm)
test_data_features=test_data_features.toarray()

#SVM with linear kernel
clf=svm.SVC(kernel='linear',C=1.0)
print ("Training")
clf.fit(train_data_features,Y_train)

print ("Testing")
predicted=clf.predict(test_data_features)
accuracy=np.mean(predicted==Y_test_svm)
print ("Accuracy: ",accuracy)
score_svm=precision_recall_fscore_support(Y_test_svm, predicted, average='weighted')
print("Precision:", score_svm[0], "\nRecall:", score_svm[1], "\nF1 Score:", score_svm[2])

Training
Testing
Accuracy:  0.8704318936877077
Precision: 0.8708256944295546 
Recall: 0.8704318936877077 
F1 Score: 0.8703373571121347


In [19]:
newDict = {"Tweets" : X_test_svm, "Actual" : Y_test_svm, "Predicted" : predicted}

df2 = pd.DataFrame(newDict).reset_index()

true_pos = df2.query('Actual == "hate" and Actual == Predicted').copy().reset_index()
true_neg = df2.query('Actual == "none" and Actual == Predicted').copy().reset_index()
false_neg = df2.query('Actual == "hate" and Actual != Predicted').copy().reset_index()
false_pos = df2.query('Actual == "none" and Actual != Predicted').copy().reset_index()

print("TP =", len(true_pos) / len(df2) * 100, "FN =", len(false_neg) / len(df2) * 100, "\nTN  =", len(true_neg) / len(df2) * 100, "FP =", len(false_pos) / len(df2) * 100)

len(df2) * 

TP = 41.52823920265781 FN = 7.392026578073089 
TN  = 45.51495016611295 FP = 5.564784053156146


1204

In [13]:
d = {"H" : "ELL", "J" : "RII"}

print("Hello " + str(d))

Hello {'H': 'ELL', 'J': 'RII'}


In [18]:
f = pd.DataFrame()
if f.empty:
    f = pd.DataFrame(d)
else
    f.merge()


SyntaxError: expected ':' (32614955.py, line 4)

In [1]:
import pandas as pd
from sklearn import svm
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

'''


'''

# Loading Data
df = pd.read_csv("dataset.csv").drop_duplicates()

# Equalizing Data Count
hate_df = df.query('Class == "hate"').sample(n=3000).copy().reset_index()
none_df = df.query('Class == "none"').sample(n=3000).copy().reset_index()

df = pd.merge(hate_df, none_df, how="outer").copy()

# DataFrame Stats
hate_per = len(df.query('Class == "hate"'))/ len(df)
none_per = len(df.query('Class == "none"')) / len(df)

print("\nLength of DataFrame:", len(df),"\nPercentage of Hate Tweets:", hate_per, "\nPercentage of non-Hate Tweets:",none_per)

# Split Data for SVM
X_train, X_test, Y_train, Y_test = train_test_split(df.Tweets, df.Class, test_size=0.2)

# Split Data Stats
train_hate_q = Y_train.value_counts()['hate']
train_none_q = Y_train.value_counts()['none']
train_len = len(Y_train)

print("\nLength of Train Split:", train_len, "\nPercentage of Hate Tweets:", train_hate_q / train_len, "\nPercentage of non-Hate Tweets:", train_none_q / train_len)

test_hate_q = Y_test.value_counts()['hate']
test_none_q = Y_test.value_counts()['none']
test_len = len(Y_test)

print("\nLength of Test Split:", test_len, "\nPercentage of Hate Tweets:", test_hate_q / test_len, "\nPercentage of non-Hate Tweets:", test_none_q / test_len)

# Fitting Data to SVM
vectorizer=CountVectorizer(analyzer = "word",tokenizer = None,preprocessor = None,stop_words = None,max_features = 5000)

train_data_features=vectorizer.fit_transform(X_train)
train_data_features=train_data_features.toarray()

test_data_features=vectorizer.transform(X_test)
test_data_features=test_data_features.toarray()

# SVM with linear kernel
classifier = svm.SVC(kernel='linear', C=1.0)

# Training Classifier
print ("\nTraining SVM...")
classifier.fit(train_data_features,Y_train)

# Testing Classifier
print ("Testing SVM...")
predicted = classifier.predict(test_data_features)

# Show scores
score_svm=precision_recall_fscore_support(Y_test, predicted, average='weighted')
print("\nPrecision:", score_svm[0], "\nRecall:", score_svm[1], "\nF1 Score:", score_svm[2])

# Re-Annotate Y_test Data

test_dict = {"Actual" : Y_test, "Predicted" : predicted}

# print(str(test_dict[0]))

df2 = pd.DataFrame(test_dict)

fn_q = len(df2.query('Actual == "hate" and Actual != Predicted'))
fp_q = len(df2.query('Actual == "none" and Actual != Predicted'))

fn_p = fn_q / len(df2)
fp_p = fp_q / len(df2)

print("\nWill randomly re-annotate: " + str((fn_p * 100)) + "% of hate speech\nWill randomly re-annotate: " +  str((fp_p * 100)) + "% of non-hate speech")


Length of DataFrame: 6000 
Percentage of Hate Tweets: 0.5 
Percentage of non-Hate Tweets: 0.5

Length of Train Split: 4800 
Percentage of Hate Tweets: 0.5 
Percentage of non-Hate Tweets: 0.5

Length of Test Split: 1200 
Percentage of Hate Tweets: 0.5 
Percentage of non-Hate Tweets: 0.5

Training SVM...
Testing SVM...

Precision: 0.8066519236071847 
Recall: 0.8058333333333333 
F1 Score: 0.8057036675170026

Will randomly re-annotate: 11.0% of hate speech
Will randomly re-annotate: 8.416666666666666% of non-hate speech


In [53]:
l = {"Tweets" : X_test, "Class" : Y_test}
test = pd.DataFrame(l)

print(test.value_counts('Class'))
print(test)




Class
hate    600
none    600
dtype: int64
                                                 Tweets Class
1996  RT @RyBen3 I'm not sexist: but I do not like t...  hate
4384  RT @Glinner: Fuck you forever, gamergate http:...  none
3617  Pro tip: Any time a man starts to give an opin...  none
3334    HAHAAHAHAHHAHAHAHAH EAT SHIT KAT AND ANDRE #mkr  none
4512  After tonight's elimination on #mkr, there's n...  none
...                                                 ...   ...
1513  OH SHIT HE WAS THE ONE @King0me You can call m...  hate
2463  .@brenbarber Chemaly is a special kind of twit...  hate
5757  RT @thetrudz: + in the fact that she is queer ...  none
20                                 @g56yu What is that?  hate
4652  I DONT know how I feel this way...\r\r\n\r\r\n...  none

[1200 rows x 2 columns]


In [61]:

# test.query('Class == "hate"').sample(frac=fn_p).replace("hate", "none").head()

s = pd.Series(test.query('Class == "hate"').sample(frac = fn_p).Tweets)
h = test.query('Class == "none"').sample(frac=fp_p)
# s_t = s.Tweets.array
h_t = h.Tweets


# test.loc[test["Tweets"] in s_t, "Class"] = "none"

test.query('Tweets in @s').replace('hate', 'none')
# test.query('Tweets in @h_t').Class.replace('none', 'hate')

t = test.Class
print(t.value_counts())
print(t)


# h.head()
# test.query("Class in 'hate'").head()


# test = 


# Y_test = pd.Series(test['Class'])

# test_hate_q = Y_test.value_counts()["hate"]
# test_none_q = Y_test.value_counts()["none"]
# test_len = len(Y_test)

# print("\nLength of Test Split:", test_len, "\nPercentage of Hate Tweets:", test_hate_q / test_len, "\nPercentage of non-Hate Tweets:", test_none_q / test_len)

# # Re-Testing Classifier
# print ("Re-Testing SVM...")
# r_predicted = classifier.predict(test_data_features)

# # Show scores
# score_svm=precision_recall_fscore_support(Y_test, r_predicted, average='weighted')
# print("\nPrecision:", score_svm[0], "\nRecall:", score_svm[1], "\nF1 Score:", score_svm[2])

hate    600
none    600
Name: Class, dtype: int64
1996    hate
4384    none
3617    none
3334    none
4512    none
        ... 
1513    hate
2463    hate
5757    none
20      hate
4652    none
Name: Class, Length: 1200, dtype: object
