# Part 4: SBERT and GCN 

## Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

# settings
pd.options.display.max_colwidth = 500

## Read in data 

In [6]:
df = pd.read_csv("../data/cleaned_tweets.csv")
df.shape

(47692, 3)

In [5]:
df_sample = pd.read_csv("../data/cleaned_tweets_sample.csv")
df_sample.shape

(4800, 3)

## Neural net sentence transformer and cosine similarities

### SBERT 

Transorm text with SBERT (SentenceTransformer) and compute cosine similarities for each tweet

#### Citation

Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks (2019)


@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "http://arxiv.org/abs/1908.10084",
}


### Example from SBERT documentation

Install: pip install -U sentence-transformers

In [82]:
# example code from https://www.sbert.net/docs/usage/semantic_textual_similarity.html 

from sentence_transformers import SentenceTransformer, util

In [155]:
model = SentenceTransformer('all-MiniLM-L6-v2')

sentences = ['A man is eating food.',
          'A man is eating a piece of bread.',
          'The girl is carrying a baby.',
          'A man is riding a horse.',
          'A woman is playing violin.',
          'Two men pushed carts through the woods.',
          'A man is riding a white horse on an enclosed ground.',
          'A monkey is playing drums.',
          'Someone in a gorilla costume is playing a set of drums.'
          ]

#Encode all sentences
embeddings = model.encode(sentences)

#Compute cosine similarity between all pairs
cos_sim = util.cos_sim(embeddings, embeddings)

#Add all pairs to a list with their cosine similarity score
all_sentence_combinations = []
for i in range(len(cos_sim)-1):
    for j in range(i+1, len(cos_sim)):
        all_sentence_combinations.append([cos_sim[i][j], i, j])

#Sort list by the highest cosine similarity score
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

print("Top-5 most similar pairs:")
for score, i, j in all_sentence_combinations[0:5]:
    print("{} \t {} \t {:.4f}".format(sentences[i], sentences[j], cos_sim[i][j]))

Top-5 most similar pairs:
A man is eating food. 	 A man is eating a piece of bread. 	 0.7553
A man is riding a horse. 	 A man is riding a white horse on an enclosed ground. 	 0.7369
A monkey is playing drums. 	 Someone in a gorilla costume is playing a set of drums. 	 0.6433
A woman is playing violin. 	 Someone in a gorilla costume is playing a set of drums. 	 0.2564
A man is eating food. 	 A man is riding a horse. 	 0.2474


In [156]:
# get each score formatted 

# for score, i, j in all_sentence_combinations[0:5]:
#     print(score)
#     print(i)
#     print(j) 
#     print(cos_sim[i][j]) 
#     print("{:.4f}".format(cos_sim[i][j]))

In [102]:
cos_sim

tensor([[ 1.0000,  0.7553, -0.1050,  0.2474, -0.0704, -0.0333,  0.1707,  0.0476,
          0.0630],
        [ 0.7553,  1.0000, -0.0610,  0.1442, -0.0809, -0.0216,  0.1157,  0.0362,
          0.0216],
        [-0.1050, -0.0610,  1.0000, -0.1088,  0.0217, -0.0413, -0.0928,  0.0231,
          0.0247],
        [ 0.2474,  0.1442, -0.1088,  1.0000, -0.0348,  0.0362,  0.7369,  0.0821,
          0.1389],
        [-0.0704, -0.0809,  0.0217, -0.0348,  1.0000, -0.1654, -0.0592,  0.1961,
          0.2564],
        [-0.0333, -0.0216, -0.0413,  0.0362, -0.1654,  1.0000,  0.0769, -0.0380,
         -0.0895],
        [ 0.1707,  0.1157, -0.0928,  0.7369, -0.0592,  0.0769,  1.0000,  0.0495,
          0.1191],
        [ 0.0476,  0.0362,  0.0231,  0.0821,  0.1961, -0.0380,  0.0495,  1.0000,
          0.6433],
        [ 0.0630,  0.0216,  0.0247,  0.1389,  0.2564, -0.0895,  0.1191,  0.6433,
          1.0000]])

In [127]:
type(cos_sim)

torch.Tensor

In [136]:
cos_sim[0][0]

tensor(1.0000)

In [148]:
# [[each for each in first] for first in cos_sim]

In [146]:
# [[["{:.4f}".format(score) for score in row] for row in tensor_array] for tensor_array in cos_sim]

# doesn't work. error warning: warnings.warn('Iterating over a tensor might cause the trace to be incorrect. 

In [149]:
# change from tensor to array 

cos_sim.numpy()

array([[ 0.99999976,  0.7553371 , -0.10495853,  0.24740164, -0.07038559,
        -0.03333382,  0.1707491 ,  0.04760282,  0.06300681],
       [ 0.7553371 ,  1.0000002 , -0.06101734,  0.14418386, -0.08090694,
        -0.02164983,  0.11570778,  0.03623926,  0.02156696],
       [-0.10495853, -0.06101734,  1.0000004 , -0.10883534,  0.02174375,
        -0.04125946, -0.09275975,  0.02312525,  0.02465787],
       [ 0.24740164,  0.14418386, -0.10883534,  0.9999999 , -0.03483596,
         0.03618856,  0.73688185,  0.08207119,  0.13887264],
       [-0.07038559, -0.08090694,  0.02174375, -0.03483596,  1.0000001 ,
        -0.16540885, -0.05921238,  0.19612251,  0.2564156 ],
       [-0.03333382, -0.02164983, -0.04125946,  0.03618856, -0.16540885,
         0.9999995 ,  0.07685373, -0.03803788, -0.08950324],
       [ 0.1707491 ,  0.11570777, -0.09275974,  0.7368821 , -0.05921238,
         0.07685371,  1.        ,  0.04953562,  0.11909154],
       [ 0.04760282,  0.03623927,  0.02312524,  0.08207119,  0

### Compare tweets labeled 'gender' with each other 
Test sbert with 4 sentences from corpus, all vs all 

In [123]:
gender_df = df_sample.loc[df['cyberbullying_type'] == 'gender']
gender_list = [tweet for tweet in gender_df['no_links_text']]
gender_list = gender_list[:20]
len(gender_list)

20

In [152]:
model = SentenceTransformer('all-MiniLM-L6-v2')

sentences = gender_list

#Encode all sentences
embeddings = model.encode(sentences)

#Compute cosine similarity between all pairs
cos_sim = util.cos_sim(embeddings, embeddings)

#Add all pairs to a list with their cosine similarity score
all_sentence_combinations = []
for i in range(len(cos_sim)-1):
    for j in range(i+1, len(cos_sim)):
        all_sentence_combinations.append([cos_sim[i][j], i, j])

#Sort list by the highest cosine similarity score
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

print("Top-5 most similar pairs:")
for score, i, j in all_sentence_combinations[0:5]:
    print("{} \t {} \t {:.4f}".format(sentences[i], sentences[j], cos_sim[i][j]))


Top-5 most similar pairs:
men when they think saying any slur making racist jokes “retard” “gay” any rape joke think they so tough. also men when we say they are not funny 	 TW rape Ray Badrans joke was gay ppl can make jokes about being gay, black ppl can make jokes about being black, I can make jokes about rape 	 0.5958
zzzz  Call me sexist but I'm yet to see a funny female comedian 	 RT  I'm not sexist but I haven't subscribed to a single female YouTuber 	 0.5943
Long ago, I asked some white gay men on our lunch break, advice on where they thought the safest city for me to relocate and live would be. They said to me, "West Hollywood girlfriend. No one wants to rape you there." This is not a joke. They meant it. And it makes perfect sense. 	  Just a joke. I thought gay rape was still cool around here. 	 0.5812
RT  Sucks to have the smile wiped off your own face huh Kat? She in a glass house should not throw stones. #mkr  #mkr2015 	 Oh Kat,KARMA bit you in the ASS! #MKR 	 0.5630
 Just

Make data frame from matrix of cosine similarity scores

In [153]:
gender_df = pd.DataFrame(cos_sim.numpy(), 
                        columns=gender_list, 
                        index=gender_list)

gender_df

Unnamed: 0,RT Sucks to have the smile wiped off your own face huh Kat? She in a glass house should not throw stones. #mkr #mkr2015,things that AREN'T jokes - rape - sexism - racism - suicide - self harm - Mariah Carey hate - gay as an insult - mental illness,"Haha did you watch big brother?, “Zankie” was disgusting and literally a straight guy and gay guy “playing it up”. Also frankie made rape jokes and literally said being lesbian is a choice unlike being gay cuz he’s a mysoginistic ass I am like ???",RT that a female referee? Not sexist but... They are only for camps right? This will not be happening in the regu…,men when they think saying any slur making racist jokes “retard” “gay” any rape joke think they so tough. also men when we say they are not funny,&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514; Teanna Trump probably cleaner than most of these twitter hoes but.........,"Long ago, I asked some white gay men on our lunch break, advice on where they thought the safest city for me to relocate and live would be. They said to me, ""West Hollywood girlfriend. No one wants to rape you there."" This is not a joke. They meant it. And it makes perfect sense.",Just a joke. I thought gay rape was still cool around here.,"The missus loves #MKR if I have to watch five more minutes of these stuck up, idiotic, 2 bit hookers I'll neck myself. #goingtobed",#BlameOneNotAll is not because I want to be congratulated by not being a rapist. It's because I don't wanna be categorized with stupid men.,"I read the Quran, bimbo follower, it's a violent, barbaric, murdering, sexist, stupid, genocidal pile of steaming shit.","TW rape Ray Badrans joke was gay ppl can make jokes about being gay, black ppl can make jokes about being black, I can make jokes about rape","I see a lot of jokes, fan fics and fan art I find objectionable. From depictions of rape to erasure of gay characters. It fucking sucks, yes. But the accurate response when this is not part of a larger media effect is to unfollow or block the creator and move on.",RT These seems extremely unlikely. 16% of the entire female population are rape victims?,"actually said was death threats, misgendering, and making fun of her appearance/making dysphoric comments, and even threatening rape, that response is the exact reason why she is allowed to joke about something like that. if gay men didn’t go out of their way to ruin trans -","Oh Kat,KARMA bit you in the ASS! #MKR",zzzz Call me sexist but I'm yet to see a funny female comedian,And here we are getting the raw end of the whole “gay men are predatory and turn straight men” stereotype all three of them threw out. Now straight guys think they have a free pass to make “James Charles tried to rape me” jokes. If it’s a stunt all are cancelled for real,RT I'm not sexist but I haven't subscribed to a single female YouTuber,how does milo have any good points? when he denies rape culture? when he bashes gay people? when he makes racist jokes?
RT Sucks to have the smile wiped off your own face huh Kat? She in a glass house should not throw stones. #mkr #mkr2015,0.999999,0.205691,0.140987,0.151168,0.141982,0.361904,0.129284,0.145812,0.370774,0.226263,-0.005983,0.25603,0.125964,0.153446,0.220679,0.562976,0.140332,0.124457,0.190745,0.196664
things that AREN'T jokes - rape - sexism - racism - suicide - self harm - Mariah Carey hate - gay as an insult - mental illness,0.205691,1.0,0.243075,0.01603,0.477464,0.272746,0.221139,0.292196,0.144978,0.338244,0.129823,0.488913,0.452543,0.149282,0.431386,0.115483,0.397525,0.322244,0.23283,0.390926
"Haha did you watch big brother?, “Zankie” was disgusting and literally a straight guy and gay guy “playing it up”. Also frankie made rape jokes and literally said being lesbian is a choice unlike being gay cuz he’s a mysoginistic ass I am like ???",0.140987,0.243075,1.0,0.175584,0.353945,0.158215,0.230977,0.399045,0.173544,0.208137,0.219628,0.421573,0.429749,0.093756,0.414873,0.110244,0.374538,0.503427,0.301626,0.323523
RT that a female referee? Not sexist but... They are only for camps right? This will not be happening in the regu…,0.151168,0.01603,0.175584,1.0,0.248039,0.151605,0.11387,0.175907,0.216007,0.112597,-0.018049,0.17324,0.164181,0.233163,0.315517,0.077266,0.337976,0.262868,0.377471,0.098082
men when they think saying any slur making racist jokes “retard” “gay” any rape joke think they so tough. also men when we say they are not funny,0.141982,0.477464,0.353945,0.248039,1.0,0.128646,0.357578,0.38581,0.163256,0.327622,0.112135,0.595768,0.345945,0.183496,0.460283,0.115305,0.41531,0.410155,0.299528,0.411821
&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514;&#128514; Teanna Trump probably cleaner than most of these twitter hoes but.........,0.361904,0.272746,0.158215,0.151605,0.128646,1.0,0.135206,0.146282,0.27884,0.283124,0.16592,0.209693,0.212253,0.234111,0.247568,0.179166,0.178268,0.256978,0.240491,0.265575
"Long ago, I asked some white gay men on our lunch break, advice on where they thought the safest city for me to relocate and live would be. They said to me, ""West Hollywood girlfriend. No one wants to rape you there."" This is not a joke. They meant it. And it makes perfect sense.",0.129284,0.221139,0.230977,0.11387,0.357578,0.135206,1.0,0.581154,0.131145,0.257066,0.095401,0.440945,0.282768,0.333254,0.38731,0.122667,0.253639,0.389399,0.189946,0.318189
Just a joke. I thought gay rape was still cool around here.,0.145812,0.292196,0.399045,0.175907,0.38581,0.146282,0.581154,1.0,0.140015,0.313697,0.201362,0.55806,0.388298,0.397074,0.401886,0.179132,0.246896,0.393419,0.232868,0.333177
"The missus loves #MKR if I have to watch five more minutes of these stuck up, idiotic, 2 bit hookers I'll neck myself. #goingtobed",0.370774,0.144978,0.173544,0.216007,0.163256,0.27884,0.131145,0.140015,1.0,0.284166,0.076877,0.212802,0.142149,0.17689,0.107822,0.437425,0.251534,0.332202,0.341865,0.165546
#BlameOneNotAll is not because I want to be congratulated by not being a rapist. It's because I don't wanna be categorized with stupid men.,0.226263,0.338244,0.208137,0.112597,0.327622,0.283124,0.257066,0.313697,0.284166,1.0,0.133369,0.325399,0.210674,0.411565,0.215061,0.242996,0.281104,0.300934,0.293976,0.305688


### SBERT and cosine similarities scaled up 