In [1]:
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer

import re

import pandas as pd

import nltk

import matplotlib.pyplot as plt

## Get and preprocess the data

In [2]:
df = pd.read_feather("../data/twitter_mutlimodal_hate_speech/data.feather")

In [3]:
labels = df["labels_str"].apply(pd.Series)

display(labels.head(5))
df.loc[16833, "tweet_text"]

# df[labels.columns] =  labels


display(pd.concat([labels[col] for col in labels.columns]).value_counts())


# One-hot encoded categories
# pd.get_dummies(labels)
oh_labels = pd.concat([
    pd.get_dummies(labels[li])
    for li in labels.columns
]).groupby(level=0).max().astype(bool)

df[oh_labels.columns] = oh_labels

display(df.head())

Unnamed: 0,0,1,2,3,4
0,Religion,Racist,Homophobe,,
1,OtherHate,OtherHate,OtherHate,,
2,NotHate,NotHate,NotHate,,
3,Racist,NotHate,NotHate,,
4,Racist,NotHate,Racist,,


NotHate      312039
Racist        63543
OtherHate     31548
Sexist        22805
Homophobe     16932
Religion       2607
dtype: int64

Unnamed: 0,img_url,labels,tweet_url,tweet_text,labels_str,tweet_id,img_text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist
0,http://pbs.twimg.com/tweet_video_thumb/D3gi9MH...,"[4, 1, 3]",https://twitter.com/user/status/11146793537140...,@FriskDontMiss Nigga https://t.co/cAsaLWEpue,"[Religion, Racist, Homophobe]",1114679353714016256,#YOUNGERU SAVE IT,True,False,False,True,True,False
1,http://pbs.twimg.com/ext_tw_video_thumb/106301...,"[5, 5, 5]",https://twitter.com/user/status/10630200488166...,My horses are retarded https://t.co/HYhqc6d5WN,"[OtherHate, OtherHate, OtherHate]",1063020048816660480,,False,False,True,False,False,False
2,http://pbs.twimg.com/media/D2OzhzHUwAADQjd.jpg,"[0, 0, 0]",https://twitter.com/user/status/11089273680753...,‚ÄúNIGGA ON MA MOMMA YOUNGBOY BE SPITTING REAL S...,"[NotHate, NotHate, NotHate]",1108927368075374593,,False,True,False,False,False,False
3,http://pbs.twimg.com/ext_tw_video_thumb/111401...,"[1, 0, 0]",https://twitter.com/user/status/11145585346356...,RT xxSuGVNGxx: I ran into this HOLY NIGGA TODA...,"[Racist, NotHate, NotHate]",1114558534635618305,,False,True,False,True,False,False
4,http://pbs.twimg.com/media/Dl30pGIU8AAVGxO.jpg,"[1, 0, 1]",https://twitter.com/user/status/10352524802155...,‚ÄúEVERYbody calling you Nigger now!‚Äù https://t....,"[Racist, NotHate, Racist]",1035252480215592966,,False,True,False,True,False,False


In [6]:
# Homophobe = df.loc[df["Homophobe"], "tweet_text"]
# NotHate = df.loc[df["NotHate"], "tweet_text"]
# OtherHate = df.loc[df["OtherHate"], "tweet_text"]
# Racist = df.loc[df["Racist"], "tweet_text"]
# Religion = df.loc[df["Religion"], "tweet_text"]
# Sexist = df.loc[df["Sexist"], "tweet_text"]

### Cleaning text

In [4]:
def clean_tweets(vTEXT):
    """
    Removes URLs, tags and more from tweets. Extend it if needed.
    
    short words: \b\w{1,2}\b
    tags starting with @: @\w*
    URL: (https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b
    """
    vTEXT = re.sub(r'\b\w{1,2}\b|@\w*|(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', vTEXT, flags=re.MULTILINE)
    return(vTEXT)

# Racist = Racist.apply(remove_urls)

In [5]:
data = df.copy()

data["text"] = data["tweet_text"].apply(clean_tweets)
data = data[["text", "Homophobe", "NotHate", "OtherHate", "Racist", "Religion", "Sexist"]]

# data = data[["text", "Homophobe", "NotHate", "OtherHate", "Racist", "Religion", "Sexist"]].melt(
# #     id_vars="tweet_text"
#     id_vars=['text'], value_vars=[
#         "Homophobe",
#         "NotHate",
#         "OtherHate",
#         "Racist",
#         "Religion",
#         "Sexist"
#     ],
#     var_name="category"
# )

# data = data[data["value"] & (data["category"] != "NotHate")].drop(columns=("value"))

In [6]:
data

Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist
0,Nigga,True,False,False,True,True,False
1,horses are retarded,False,False,True,False,False,False
2,‚ÄúNIGGA MOMMA YOUNGBOY SPITTING REAL SHIT NI...,False,True,False,False,False,False
3,xxSuGVNGxx: ran into this HOLY NIGGA TODAY üò≠...,False,True,False,True,False,False
4,‚ÄúEVERYbody calling you Nigger now!‚Äù,False,True,False,True,False,False
...,...,...,...,...,...,...,...
149818,would just say hes Donny the retard,False,True,True,False,False,True
149819,congrats nigga keep grinding,False,True,False,False,False,False
149820,nigga big shitty,False,True,False,True,False,False
149821,did she just say ‚Äú nigga‚Äù Rich? &amp; she sai...,False,True,False,True,False,False


In [10]:
# Here we only export the dataset to be able to use it in other notebooks (not needed to do again, as we have already exported 
# our dataset)
# data.to_excel('../data/twitter_mutlimodal_hate_speech/twitter_mutlimodal_hate_speech.xlsx')

### Stemming

In [7]:
# Load SnowballStemmer
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")

def tokenize_and_stem(text):
    # First tokenize by sentence, then by word to ensure that punctuation is caught as it's own token.
    tokens = [word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]
    filtered_tokens = []
    # Filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation).
    for token in tokens:
        if re.search('[a-zA-Z]', token):
            filtered_tokens.append(token)
    stems = [stemmer.stem(t) for t in filtered_tokens]
    return stems


def tokenize_only(text):
    # First tokenize by sentence, then by word to ensure that punctuation is caught as it's own token.
    tokens = [word.lower() for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]
    filtered_tokens = []
    # Filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation).
    for token in tokens:
        if re.search('[a-zA-Z]', token):
            filtered_tokens.append(token)
    return filtered_tokens

In [8]:
tweets_text = data["text"].tolist()

In [9]:
totalvocab_stemmed = []
totalvocab_tokenized = []
for i in tweets_text:
    allwords_stemmed = tokenize_and_stem(i)
    totalvocab_stemmed.extend(allwords_stemmed)
    
    allwords_tokenized = tokenize_only(i)
    totalvocab_tokenized.extend(allwords_tokenized)

# Create a pandas DataFrame
vocab_frame = pd.DataFrame({'words': totalvocab_tokenized}, index = totalvocab_stemmed)

print("There are '{}' items in our data frame.".format(str(vocab_frame.shape[0])))
print("Data frame contents: \n{}".format(vocab_frame.head(10)))

There are '1146606' items in our data frame.
Data frame contents: 
             words
nigga        nigga
hors        horses
are            are
retard    retarded
nigga        nigga
momma        momma
youngboy  youngboy
spit      spitting
real          real
shit          shit


## TF-IDF Encoding

In [10]:
# Define vectorizer parameters
tfidf_vectorizer = TfidfVectorizer(
                        max_df=0.8, 
                        max_features=200000,
#                         min_df=0.2, 
                        min_df=0.0015,
#                         min_df=0.002,
                        stop_words='english', 
                        use_idf=True, 
                        tokenizer=tokenize_and_stem, 
                        ngram_range=(1,3))

# Fit the vectorizer to synopses texts
tfidf_matrix = tfidf_vectorizer.fit_transform(tweets_text) 

print("TF-IDF matrix shape: {}".format(tfidf_matrix.shape))

  'stop_words.' % sorted(inconsistent))


TF-IDF matrix shape: (149823, 589)


In [11]:
print("TF-IDF vectors (each column is a document):\n{}\nRows:\n{}".format(tfidf_matrix.T.A, tfidf_vectorizer.get_feature_names()))

TF-IDF vectors (each column is a document):
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Rows:
['absolut', 'account', 'act', 'actual', 'ago', 'ain', 'aint', 'alreadi', 'alway', 'america', 'amp', 'ani', 'anim', 'anoth', 'anoth nigga', 'anyon', 'anyth', 'asf', 'asian', 'ask', 'ass', 'ass bitch', 'ass bitch fuck', 'ass nigga', 'away', 'aye', 'babe', 'babi', 'bad', 'bag', 'ball', 'bar', 'beat', 'becaus', 'befor', 'believ', 'best', 'bet', 'better', 'big', 'big dyke', 'big dyke energi', 'bint', 'birthday', 'birthday nigga', 'bit', 'bitch', 'bitch ass', 'bitch ass nigga', 'bitch fuck', 'bitch fuck bout', 'bitch nigga', 'black', 'bless', 'block', 'blond', 'bodi', 'book', 'border', 'bout', 'bout nigga', 'boy', 'break', 'bring', 'bro', 'broke', 'broke nigga', 'brother', 'bruh', 'buildthewal', 'buildthewal maga', 'busi', 'buy', 'came', 'car', 'card', 'care', 'cat', 'catch', 'caus', 'chan

In [12]:
terms = tfidf_vectorizer.get_feature_names()

# from sklearn.metrics.pairwise import cosine_similarity
# dist = 1 - cosine_similarity(tfidf_matrix)

## Clustering

In [13]:
num_clusters = 10
km = KMeans(n_clusters=num_clusters)

# Perform clustering
km.fit(tfidf_matrix)

clusters = km.labels_.tolist()
print("Clusters: {}".format(clusters))

Clusters: [4, 0, 1, 1, 1, 1, 2, 6, 3, 4, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 8, 7, 7, 7, 1, 5, 1, 6, 3, 1, 1, 3, 1, 5, 7, 1, 1, 6, 1, 7, 1, 1, 1, 1, 1, 1, 0, 7, 1, 1, 1, 1, 1, 5, 5, 0, 1, 7, 7, 1, 5, 5, 1, 0, 1, 8, 6, 7, 1, 1, 1, 1, 0, 8, 6, 3, 7, 1, 1, 1, 1, 1, 1, 4, 1, 6, 7, 1, 0, 6, 1, 1, 7, 1, 7, 4, 1, 1, 5, 1, 1, 3, 1, 1, 3, 1, 0, 1, 1, 1, 4, 8, 8, 1, 1, 6, 7, 1, 1, 4, 4, 1, 0, 1, 1, 1, 0, 5, 1, 1, 7, 0, 5, 4, 1, 1, 6, 0, 6, 1, 6, 8, 9, 5, 1, 1, 1, 1, 1, 5, 1, 4, 5, 1, 8, 1, 1, 7, 1, 1, 1, 1, 4, 1, 3, 4, 5, 8, 7, 5, 5, 1, 1, 1, 1, 1, 0, 1, 0, 5, 1, 2, 1, 1, 6, 1, 1, 8, 1, 1, 0, 7, 5, 6, 1, 6, 3, 1, 1, 1, 0, 1, 0, 6, 1, 1, 9, 1, 1, 9, 1, 1, 4, 4, 3, 1, 0, 1, 1, 1, 4, 6, 1, 1, 1, 1, 1, 1, 3, 1, 8, 1, 1, 1, 1, 8, 4, 1, 1, 1, 6, 1, 5, 3, 6, 1, 8, 4, 1, 1, 1, 0, 5, 1, 1, 1, 9, 6, 1, 1, 5, 1, 1, 1, 6, 1, 1, 1, 1, 1, 7, 4, 1, 9, 4, 0, 1, 6, 8, 1, 6, 8, 1, 1, 1, 1, 0, 1, 0, 1, 7, 1, 6, 1, 1, 1, 1, 1, 7, 0, 4, 1, 7, 1, 0, 7, 4, 1, 1, 7, 1, 3, 1, 4, 1, 1, 7, 6, 0, 1, 1, 6, 1, 8, 1, 4, 1,

In [17]:
# # Elbow method
# distortions = []
# for i in range(1, 20):
#     km = KMeans(
#         n_clusters=i, init='random',
#         n_init=10, max_iter=300,
#         tol=1e-04, random_state=0
#     )
#     km.fit(tfidf_matrix)
#     distortions.append(km.inertia_)

# # plot
# plt.plot(range(1, 20), distortions, marker='o')
# plt.xlabel('Number of clusters')
# plt.ylabel('Distortion')
# plt.show()

In [18]:
# import joblib

# Uncomment the below line to save your model 
#joblib.dump(km,  'doc_cluster.pkl')

# Uncomment the below line to load your saved model 
#km = joblib.load('doc_cluster.pkl')
#clusters = km.labels_.tolist()
# print("Clusters: {}".format(clusters))

In [14]:
data["cluster"] = clusters

print("Number of movies per cluster: \n{}".format(data["cluster"].value_counts()))

Number of movies per cluster: 
1    82932
6    13276
4    11693
5     9462
0     8415
7     7362
8     6409
3     5677
9     2339
2     2258
Name: cluster, dtype: int64


In [20]:
# data.groupby("cluster")["category"].value_counts()

## Researching clusters

In [15]:
print("Top terms per cluster:\n")

# Sort cluster centers by proximity to centroid.
order_centroids = km.cluster_centers_.argsort()[:, ::-1] 

# Helper function
def getClusterWords(cluster, n=15):
    words = []
    for ind in order_centroids[cluster, :n]: # Print 6 words per cluster
        print(terms[ind])
        words.append(vocab_frame.loc[terms[ind].split(' '),].values.tolist()[0][0])
    return ", ".join(words)

for i in range(num_clusters):
    print("-"*50)
    print(f"Cluster {i}")
    
    print(getClusterWords(i))
    
    t = data[data["cluster"] == i]
    display(t)
    # Note that this does NOT add to 1, as one tweet has multiple categories
    display((t.drop(columns=["text", "cluster"]).sum() / len(t)).round(3))
    
#     break

    print("\n")

Top terms per cluster:

--------------------------------------------------
Cluster 0
retard
fuck retard
fuck
think
just
look
mental
peopl
went
shit
whi
say
trump
like
actual
retarded, fuck, fuck, thinking, just, look, mentally, people, went, shit, why, says, trump, like, actually


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
1,horses are retarded,False,False,True,False,False,False,0
49,"Jeffery really said ""' take card from retard""",False,True,True,False,False,False,0
58,"Broke monitor because ‚Äô fucking retard, now...",False,True,False,False,False,False,0
66,Ohhhh you retarded RETARDED,False,False,True,False,False,False,0
75,Battlefield arrivera finalement retard¬†!,False,True,True,False,False,False,0
...,...,...,...,...,...,...,...,...
149796,"#PUBGMOBILE You went full retard man, never f...",False,True,True,False,False,False,0
149802,When she rolled her sleeves knew this interv...,False,False,True,True,False,False,0
149814,Ann shut you mentally retarded lunatic!,False,True,True,False,False,False,0
149817,"Thanks ""‚Äî retarded"" For Following !!!",False,True,False,False,False,True,0


Homophobe    0.033
NotHate      0.857
OtherHate    0.780
Racist       0.102
Religion     0.022
Sexist       0.049
dtype: float64



--------------------------------------------------
Cluster 1
nigga
nigger
got
redneck
fuck
ass
surrend
just
said
shit
real
hillbilli
say
don
sjw
nigga, nigger, got, redneck, fuck, ass, surrender, just, said, shit, real, hillbilly, says, don, sjw


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
2,‚ÄúNIGGA MOMMA YOUNGBOY SPITTING REAL SHIT NI...,False,True,False,False,False,False,1
3,xxSuGVNGxx: ran into this HOLY NIGGA TODAY üò≠...,False,True,False,True,False,False,1
4,‚ÄúEVERYbody calling you Nigger now!‚Äù,False,True,False,True,False,False,1
5,‚Äú real ass bitch give fuck boutta nigga‚Äù,False,True,False,False,False,False,1
10,.... you turn twitter for instead beating ...,False,True,False,False,False,False,1
...,...,...,...,...,...,...,...,...
149815,know ‚Äô too much real nigga ever danger ...,False,True,False,False,False,False,1
149816,Good luck nigga ü§òüèæ,False,True,False,True,False,False,1
149820,nigga big shitty,False,True,False,True,False,False,1
149821,did she just say ‚Äú nigga‚Äù Rich? &amp; she sai...,False,True,False,True,False,False,1


Homophobe    0.028
NotHate      0.962
OtherHate    0.100
Racist       0.413
Religion     0.016
Sexist       0.069
dtype: float64



--------------------------------------------------
Cluster 2
white
trash
white trash
hous
peopl
trump
racist
piec
like
just
nigga
look
trailer
men
fuck
white, trash, white, house, people, trump, racist, pieces, like, just, nigga, look, trailer, men, fuck


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
6,Fuck ice. White supremacist trash. All you ...,False,False,True,True,False,False,2
184,White Trash Women Webcam¬†Masturbation,True,True,False,False,False,True,2
441,"Don' know what mean. ' called ""white trash""....",False,True,True,False,False,False,2
540,SKIP REALLY CALLED WHITE TRASH ASBDKBDKWHEOFN...,False,False,True,True,False,False,2
583,George. THE POTUS WHITE TRASH WEASEL DO...,False,True,False,True,False,False,2
...,...,...,...,...,...,...,...,...
149429,Nigga poles filling white holes !!! #BlacksOnC...,True,True,False,True,False,False,2
149608,‚ÄúPimps and hoes‚Äù White trash will suffice,False,True,True,False,False,False,2
149612,"You' think, but there' lot inbred white tr...",False,False,True,True,False,False,2
149644,"Come see the ladies, white trash party tonight...",False,True,False,False,True,False,2


Homophobe    0.026
NotHate      0.803
OtherHate    0.358
Racist       0.705
Religion     0.018
Sexist       0.061
dtype: float64



--------------------------------------------------
Cluster 3
faggot
sissi
fuck
sissi faggot
say
gay
cock
expos
ass
look
just
straight
use
like
love
faggot, sissy, fuck, sissy, says, gay, cock, exposed, ass, look, just, straight, used, like, love


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
8,#sissy faggot,True,True,False,False,False,False,3
31,and are the Jack and Ennis from Brokeback (f...,True,False,False,True,False,False,3
34,"fucked , response was too faggot like, need...",True,True,False,False,False,False,3
78,proud #serve the #superior #alphamale pro...,False,True,False,False,False,False,3
104,Faggot,True,False,False,False,False,False,3
...,...,...,...,...,...,...,...,...
149685,when see faggot floor,True,True,True,False,False,False,3
149716,The nerve this Faggot make this statement!,True,True,True,False,False,False,3
149749,Relapsing Faggot..... #findom # piggy #paypig,True,True,False,False,False,False,3
149769,faggot need attention,True,False,True,False,False,False,3


Homophobe    0.775
NotHate      0.768
OtherHate    0.160
Racist       0.104
Religion     0.014
Sexist       0.150
dtype: float64



--------------------------------------------------
Cluster 4
nigga
don
real
shit
know
say
got
alway
nigga nigga
ain
hey
nigga love
good
said
love
nigga, don, real, shit, know, says, got, always, nigga, ain, hey, nigga, good, said, love


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
0,Nigga,True,False,False,True,True,False,4
9,Nigga what?,True,True,False,False,True,False,4
12,‚Äô üíØ behind you nigga thug brotherüñ§,False,True,False,False,False,False,4
13,Who dafuqq this nigga,False,True,False,True,False,False,4
22,NIGGA. Wya?,False,True,False,True,False,False,4
...,...,...,...,...,...,...,...,...
149717,Ribbit ribbit nigga,False,True,False,False,True,False,4
149728,Alright...well that‚Äô enough banter...bye nigga,False,True,False,True,False,False,4
149742,Nigga üò≠,False,True,False,False,False,False,4
149806,nigga' first Chick Fil,False,True,False,False,True,False,4


Homophobe    0.017
NotHate      0.983
OtherHate    0.032
Racist       0.421
Religion     0.012
Sexist       0.040
dtype: float64



--------------------------------------------------
Cluster 5
like
nigga
look
look like
nigga like
like nigga
nigga look
nigga look like
just
got
don
shit
feel like
whi
feel
like, nigga, look, look, nigga, like, nigga, nigga, just, got, don, shit, feel, why, feel


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
28,Pamilerin like after seeing this: muscular...,False,True,False,True,False,False,5
36,The ice cream truck nigga was like üöêüí®üí®üí®üí®üí®,False,True,False,False,False,False,5
56,Facts like beat nigga,False,True,False,False,False,False,5
57,Bro you look like Human Sherk and Kurt Coba...,False,True,True,False,False,False,5
63,Bumping ‚Äú you love ‚Äù like üó£NIGGA!,False,True,False,True,False,False,5
...,...,...,...,...,...,...,...,...
149770,Looking like this nigga gift and curse lol,False,True,False,False,False,False,5
149779,Miami what‚Äô popping.. seeing where the freak n...,True,True,False,False,False,False,5
149787,They like family first naw bitch loyalty firs...,False,True,False,False,False,False,5
149788,"Bruh, Ron Baker looks like the third Paul brot...",True,True,True,False,False,False,5


Homophobe    0.051
NotHate      0.962
OtherHate    0.105
Racist       0.376
Religion     0.013
Sexist       0.092
dtype: float64



--------------------------------------------------
Cluster 6
cunt
fuck
fuck cunt
milf
babe
big
just
look
littl
fat
love
tit
watch
littl cunt
galleri
cunt, fuck, fuck, milf, babe, big, just, look, little, fat, love, tits, watching, little, gallery


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
7,Day‚Äô cunt,False,True,False,False,False,False,6
30,Lydia just called this man cunt.,False,True,False,False,False,True,6
40,"Fare play , little cunt wont doing that again",False,True,False,False,False,False,6
69,Git bottle bucky intae brit cunt you kn...,False,True,True,True,False,False,6
77,the dude that called fat cunt before 9AM......,False,True,False,False,False,False,6
...,...,...,...,...,...,...,...,...
149750,"Git eet doooone!! : Just being pushy cunt, m...",False,True,False,False,False,True,6
149758,Indian desi newport bathing suits for women t...,True,True,False,False,False,False,6
149790,She came baring goodies ü•∫ü•∫ü§óü§ó‚ù§Ô∏è missed this C...,False,True,False,False,False,True,6
149792,out for sneaky beer after work and this cunt ...,False,True,False,False,False,False,6


Homophobe    0.070
NotHate      0.933
OtherHate    0.211
Racist       0.129
Religion     0.015
Sexist       0.498
dtype: float64



--------------------------------------------------
Cluster 7
twat
fuck
just
day
littl
look
watch
make
think
like
love
don
spread
right
say
twat, fuck, just, day, little, look, watching, make, thinking, like, love, don, spreading, right, says


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
24,motivation from favorite twat ‚ù§Ô∏èüçûüèÉüèª‚Äç‚ôÇÔ∏è,False,True,False,False,False,True,7
25,"Mike was twat, cocky son that always dele...",False,True,True,False,False,False,7
26,And said his not cos his but this twat sti...,False,True,False,False,False,False,7
37,Fuck England little twat!,False,True,False,False,False,True,7
42,When You' Just Had Enough That Cocky Twat!,False,True,False,False,False,False,7
...,...,...,...,...,...,...,...,...
149722,Watching Mourinho act twat #CHEMUN,False,True,True,False,False,False,7
149735,Looking for some this for the local twat th...,False,True,False,True,False,False,7
149739,ProTip; Don' tag creators your one-sentence ...,False,True,False,False,False,False,7
149752,LDG when hook you for throat twat üò≠,False,True,False,False,False,True,7


Homophobe    0.046
NotHate      0.946
OtherHate    0.301
Racist       0.119
Religion     0.019
Sexist       0.408
dtype: float64



--------------------------------------------------
Cluster 8
dyke
van dyke
van
dick van dyke
dick van
dick
dyke energi
energi
jason van dyke
jason van
jason
big dyke
trial
big
big dyke energi
dyke, van, van, dick, dick, dick, dyke, energy, jason, jason, jason, big, trial, big, big


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
23,carol really said fuck yall ‚Äô dyke and ‚Äô here...,False,True,True,False,False,False,8
68,BIG DYKE ENERGY RIGHT THERE,True,True,False,False,False,False,8
76,cos theres strange brooding women dyke ener...,False,True,False,False,False,True,8
114,Jason Van Dyke' partner testifies murder trial,False,True,False,True,False,False,8
115,yes they can say dyke what gonna about huh,False,True,False,False,False,False,8
...,...,...,...,...,...,...,...,...
149681,Arlin Horton actually living life Dick Van D...,False,True,True,False,False,False,8
149751,Dyke New-Harford Pella Christian. Game . . #B...,False,True,False,False,False,False,8
149767,Protestors stopped from walking sidewalk outsi...,False,True,True,False,False,False,8
149780,‚Äô gay and dyke and love food and this bodysu...,True,True,False,False,False,False,8


Homophobe    0.480
NotHate      0.915
OtherHate    0.147
Racist       0.102
Religion     0.017
Sexist       0.120
dtype: float64



--------------------------------------------------
Cluster 9
buildthewal
maga
border
wall
buildthewal maga
trump
presid
america
thank
amp
vote
stop
come
need
day
buildthewall, maga, border, wall, buildthewall, trump, president, america, thank, amp, vote, stop, coming, need, day


Unnamed: 0,text,Homophobe,NotHate,OtherHate,Racist,Religion,Sexist,cluster
145,This truth! #MAGA #KAG #BenghaziNeverForget #...,False,True,True,False,False,False,9
209,#BuildTheWall - Not President - Not Resident,False,True,False,True,False,False,9
212,"you #BuildTheWall, would good start you ...",False,True,False,False,False,False,9
259,More fake news!!!!!! Liar and enemy the peo...,False,True,True,True,False,False,9
276,THIS NOT GAME THE DIFFERENCE #MAGA #WalkA...,False,True,True,False,False,False,9
...,...,...,...,...,...,...,...,...
149490,"love you, . President! #MAGA #BuildTheWall #...",False,True,True,False,False,False,9
149647,#VoteRepublican STOP INVASION and #BuildTheW...,False,True,False,True,False,False,9
149690,#BuildTheWall #-VERIFY #PunishEmployersOfIlle...,True,True,False,True,False,False,9
149713,‚Äô everywhere #RedWave #JobsNotMobs #Build...,False,True,False,False,False,False,9


Homophobe    0.022
NotHate      0.964
OtherHate    0.329
Racist       0.377
Religion     0.030
Sexist       0.021
dtype: float64



