In [1]:
from transformers import AutoTokenizer, AutoModel, TFAutoModel, AutoModelForSequenceClassification, TFAutoModelForSequenceClassification, BertModel, BertForSequenceClassification, BertTokenizer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Concatenate, GlobalAveragePooling1D
import torch
from openai import OpenAI
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
from torchsummary import summary
import tensorflow as tf
import datasets
import numpy as np
import pandas as pd
from scipy.special import softmax
from sklearn import metrics
from sklearn.utils.class_weight import compute_class_weight
from time import time
import gc

# Obtaining the factuals

In [2]:
def get_texts(df):
    np_array = df["text"].to_numpy()
    texts = []
    for i in range(len(np_array)):
        texts.append(np_array[i])
    return texts

## Selecting the factuals (If they are already saved you can skip this step) 

In [3]:
#Frenk
df_frenk_test = pd.read_csv('data/frenk_test.tsv',sep='\t')
frenk_test = get_texts(df_frenk_test)
frenk_label_test = df_frenk_test["label"].to_numpy()

#Twitter-Reddit-Youtube
df_try_test = pd.read_csv('data/try_test_2301.tsv',sep='\t')
try_test = get_texts(df_try_test)
try_label_test = df_try_test['hate'].to_numpy()

#Concatenation of the two
conc = frenk_test+try_test
labels_conc = np.concatenate([frenk_label_test,try_label_test],axis=0)

In [4]:
factuals = []
labels_factuals = []

#Factuals filtering
for i in range(len(conc)):
    if len(conc[i])>=20 and len(conc[i])<=200:
        factuals.append(conc[i])
        labels_factuals.append(labels_conc[i])

In [5]:
fact_dict = {"text":factuals, "label":labels_factuals}
df_factuals = pd.DataFrame(fact_dict)
df_factuals.to_csv('data/factuals.tsv', sep="\t", index=False)

In [6]:
print(len(factuals),len(labels_factuals))

3127 3127


In [7]:
print(len(frenk_test+try_test),frenk_label_test.shape)

4602 (2301,)


In [8]:
jeje = []
for i in range(len(conc)):
    if len(conc[i])<20:
        jeje.append(conc[i])
print(len(jeje))
print(jeje)

338
['👏👏👏👌👌👌✊✊✊👍👍👍', 'Well said.', 'about time :)', 'Good news', 'Same as what?', 'Phil Freeman', 'Anja', 'Lovely beard x', 'Stephen Liam Emily', 'Old queens', 'Says the bigot', "That's it dude! 😂😂😘", 'Paul Truscelli', ':)', 'Vote no', '.........', 'Jess Lawrence', 'Kate de Smeth', '😂😂😂', 'José María', 'Yukkhhhhhh :D', 'funny', 'OMG can we go now?', 'Colin Keays!', 'My heart melts', 'This is beaut!', 'makes me so happy', 'DEGENERATE SOCIETY', '❤️', 'Amy Clark', 'Where can I sign up', 'thats the truth', 'Oliver Crisp', 'Agree too', 'Please', 'Ewwww', 'ur disgusting', 'Uke til ya puke', 'DISCUSTING GAY', 'Fuck no', 'Dorian', "Don't deny it", 'Yeah!!', 'so is your mother', 'che khoob😊', 'OAPLGBT phew', 'Alex K! X', 'N', 'Bullshit.', 'Fix it Jesus', 'Great idea, mates', 'Robby Clapham', 'ur disgusting', ':-)', 'Callum Broome', 'says the bigot', '@peter Bruch', "you're sorted now 😉", 'Bravo', '😘', 'u need therapy', 'ur disgusting', 'HANG THE SODOMITES', "It's not the 60's.", 'Mmmm', 'Huh ba

## Loading the factuals

In [3]:
df_factuals = pd.read_csv('data/factuals.tsv',sep='\t')
factuals = get_texts(df_factuals)
labels_factuals = df_factuals["label"].to_numpy()

In [4]:
df_factuals

Unnamed: 0,text,label
0,This is so awesome! I hope it works out well :-),0
1,Grandpa is a smart man.,0
2,I'm on the planet where gay people are margina...,1
3,http://www.abc.net.au/news/2012-03-06/badgett-...,0
4,Not from me Paul Threlfall! 😷😷😷,0
...,...,...
3122,Adab's Islamic Exchange will be live soon and ...,0
3123,"Rashida Tlaib said ""we are women of color' and...",0
3124,@LEAST BØTHERED MAI INDIAN HON BROTHER. PAKIST...,0
3125,I would rip ur clothes of bend u over slap ur ...,0


In [5]:
f = "Your a racist bigoted prick Ryan."
len(f)

33

# Obtaining the offensivity of the factuals (If it is already saved you can skip this step) 

In [12]:
#roBERTa models
sent_MOD = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
aggr_MOD = f"cardiffnlp/twitter-roberta-base-offensive"

#Load tokenizer and models
tokenizer = AutoTokenizer.from_pretrained(sent_MOD)
sent_class_mod = AutoModelForSequenceClassification.from_pretrained(sent_MOD)
aggr_class_mod = AutoModelForSequenceClassification.from_pretrained(aggr_MOD)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
def Compute_Caus_Outs(tokenizer,sent_mod,aggr_mod,texts,batch_size):
  # Split texts into batches
  batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]
  batch_out_1 = []
  batch_out_2 = []
  for batch in batches:
      encoded_inputs = tokenizer(batch,padding=True,return_tensors='pt')
      # Compute the outputs of the modules as numpy arrays.
      emb_1 = sent_mod(**encoded_inputs)[0]
      scores_1 = []
      for i in range(emb_1.shape[0]):
            scores_1.append(softmax(emb_1[i].detach().numpy()))
      scores_1 = np.array(scores_1)
      batch_out_1.append(scores_1)  

      emb_2 = aggr_mod(**encoded_inputs)[0]
      scores_2 = []
      for i in range(emb_2.shape[0]):
            scores_2.append(softmax(emb_2[i].detach().numpy()))
      scores_2 = np.array(scores_2)
      batch_out_2.append(scores_2)
    
      encoded_inputs = None
      emb_1 = None
      emb_2 = None
      gc.collect() 
    
  outs_sent = np.concatenate(batch_out_1,axis=0)
  outs_aggr = np.concatenate(batch_out_2,axis=0)  

  return outs_sent,outs_aggr

In [14]:
batch_size = 32

factual_sent = []
factual_off = []
t = time()
class_fact_sent,class_fact_off = Compute_Caus_Outs(tokenizer,sent_class_mod,aggr_class_mod,factuals,batch_size)
print("Elapsed time:",time()-t)
for i in range(len(factuals)):
    factual_sent.append(class_fact_sent[i].argmax())
    factual_off.append(class_fact_off[i].argmax())    

Elapsed time: 369.9016773700714


In [15]:
print(len(factual_sent),len(factual_off))

3127 3127


In [16]:
np.save('outs/counterfactuals/class_fact_sent.npy',class_fact_sent)
np.save('outs/counterfactuals/class_fact_off.npy',class_fact_off)
np.save('outs/counterfactuals/factual_sent.npy',factual_sent)
np.save('outs/counterfactuals/factual_off.npy',factual_off)

# Generating the counterfactuals (OpenAI requests)

In [6]:
n = 5

### OpenAI requests (If the counterfactuals are already created you can skip this step)

In [6]:
#Loading the factuals offensivity
factual_off = np.load('outs/counterfactuals/factual_off.npy')

In [7]:
system_inc = '''You must revise a given sentence INCREASING its OFFENSIVENESS without changing its MEANING. Here are a few examples of what I would like:
<Example 1> Sentence: Trust me, Mr Jackson, with this attitude, it is not your country.
Revised Sentence: Listen up, Mr. Jackson, with that appalling attitude of yours, this country couldn't be further from belonging to someone like you. </Example 1>
<Example 2> Sentence: I do need help - help explaining to religious fanatics, that we live in 2014.
Revised Sentence: I absolutely require assistance - assistance to elucidate to those ignorant religious zealots, stuck in their archaic beliefs, that we are indeed living in the year 2014, not the Dark Ages. </Example 2>
<Example 3> Sentence:  John, you obviously don't understand our laws or our Constitution.
Revised Sentence: John, it's painfully evident that your pea-sized brain can't understand our laws or our Constitution. </Example 3>
<Example 4> Sentence: We are ALL tired of waiting . . realize something.
Revised Sentence: We're ALL damn sick and tired of waiting. Pull your head out of your rear end and realize something! </Example 4>'''

In [8]:
system_dec = '''You must revise a given sentence DECREASING its OFFENSIVENESS without changing its MEANING. Here are a few examples of what I would like:
<Example 1> Sentence: Fu*king bastards. Stop mention God! Stupids! Stooooop!
Revised Sentence: Please refrain from mentioning God, anyone. </Example 1>
<Example 2> Sentence: Military has and will always have GAYS. LOOK AT ancient Greece. You're a misanthropic bigot
Revised Sentence: Military has and will always have homosexuals. Look at ancient Greece. </Example 2>
<Example 3> Sentence: I think your role will be to shut up and do as you are told. If not don't sign up.
Revised Sentence: Your role will likely involve following instructions. If that's not something you're comfortable with, this might not be the right fit for you. </Example 3>
<Example 4> Sentence: They ARE fighting for our country you fekkin pillock.
Revised Sentence: They are fighting for our country! </Example 4>'''

In [9]:
user_sent = "Sentence: "
user_rev_sent = "\nRevised Sentence:"

In [21]:
client = OpenAI()

In [22]:
cf_off = [] 
for i in range(len(factuals)):
    system = system_inc if factual_off[i]==0 else system_dec 
    user = user_sent + factuals[i] + user_rev_sent
    
    completion = client.chat.completions.create(
      model="gpt-3.5-turbo",
      n=n,  
      messages=[
        {"role": "system", "content": system},
        {"role": "user", "content": user}
      ]
    )
    
    cf = []
    for j in range(n):
        cf.append(completion.choices[j].message.content)
    cf_off.append(cf.copy())

np.save('outs/counterfactuals/cf_off.npy',np.array(cf_off))

## Saving the counterfactuals in a .tsv file

In [7]:
cf_off = np.load('outs/counterfactuals/cf_off.npy')

In [8]:
for i in [0,1,2]:
    for j in range(n):
        print(cf_off[i][j])
    print()

This is freaking fantastic! I hope it works out well, for your sake.
This is beyond awesome! I hope it turns out even better than you think, you naive optimist :-)
This is so unbelievably awesome! I hope it works out well, you cheerful little optimist :-)
This is ridiculously awesome! I pray it works out phenomenally well! :)
This is absolutely fantastic! I'm praying it doesn't crash and burn miserably :-/

Grandpa may be old, but surprisingly, he's somewhat intelligent.
Grandpa thinks he's so smart, but really he's just an arrogant old fool.
Grandpa is a cunning old fox, always thinking one step ahead.
Grandpa may not be as senile as we thought, surprisingly he does have some form of intelligence tucked away somewhere.
Grandpa may be ancient, but surprisingly, he's not completely brain-dead.

I'm on the planet where people face discrimination. What planet are you on?
I'm on the planet where LGBTQ+ individuals are marginalized. Which planet are you on?
I'm on the planet where people of

In [9]:
cf_off_all = []
for i in range(len(factuals)):
    for j in range(n):
        cf_off_all.append(cf_off[i][j])

cf_dict = {"text":cf_off_all}
df_cf = pd.DataFrame(cf_dict)
df_cf.to_csv('data/cf_off.tsv', sep="\t", index=False)