In [1]:
from transformers import AutoTokenizer, AutoModel, TFAutoModel, AutoModelForSequenceClassification, TFAutoModelForSequenceClassification, BertModel, BertForSequenceClassification, BertTokenizer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Concatenate, GlobalAveragePooling1D
import torch
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
from torchsummary import summary
import tensorflow as tf
import datasets
import numpy as np
import pandas as pd
from scipy.special import softmax
from sklearn import metrics
from sklearn.utils.class_weight import compute_class_weight
from time import time
import gc

In [2]:
class Sample:
    pass

class Factual_info:
    def __init__(self,text):
         self.text = text
         self.sent = [0,0,0]
         self.sent_pred = 0
         self.off = 0
         self.off_pred = 0
         self.hate_peace = 0
         self.peace_pred = 0
         self.hate_gen = 0
         self.gen_pred = 0
         self.hate_hateBERT = 0
         self.hateBERT_pred = 0
         self.sem_emb = 0
    
    def print_info(self):
        print("Text:")
        print(self.text)
        print()
        print("Sentiment")
        print("Negative:",self.sent[0])
        print("Neutral:",self.sent[1])
        print("Positive:",self.sent[2])
        print()
        print("Offense:",self.off)
        print()
        print("Hate of PEACE:",self.hate_peace)
        print()
        print("Hate of the general module:",self.hate_gen)
        print()
        print("Hate of HateBERT:",self.hate_hateBERT)
        
class Metrics:
    def __init__(self):
        self.dist_sent = 0
        self.dist_off = 0
        self.same_sent = 0
        self.distinct_off = 0
        self.off_improvement = 0
        self.sem_sim = 0
    
    def print_metrics(self):
        print("Metrics")
        print("Sentiment distance:",self.dist_sent)
        print("Offense distance:",self.dist_off)
        print()
        print("Same Sentiment:",self.same_sent)
        print("Distinct Offense:",self.distinct_off)
        print("Offense improvement:",self.off_improvement)
        print()
        print("Sentence similarity:",self.sem_sim)
        

# Causal modules

In [3]:
#roBERTa models
sent_MOD = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
aggr_MOD = f"cardiffnlp/twitter-roberta-base-offensive"

#Load tokenizer and models
tokenizer = AutoTokenizer.from_pretrained(sent_MOD)
sent_class_mod = AutoModelForSequenceClassification.from_pretrained(sent_MOD)
aggr_class_mod = AutoModelForSequenceClassification.from_pretrained(aggr_MOD)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
def Compute_Caus_Outs(tokenizer,sent_mod,aggr_mod,texts,batch_size):
  # Split texts into batches
  batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]
  batch_out_1 = []
  batch_out_2 = []
  for batch in batches:
      encoded_inputs = tokenizer(batch,padding=True,return_tensors='pt')
      # Compute the outputs of the modules as numpy arrays.
      emb_1 = sent_mod(**encoded_inputs)[0]
      scores_1 = []
      for i in range(emb_1.shape[0]):
            scores_1.append(softmax(emb_1[i].detach().numpy()))
      scores_1 = np.array(scores_1)
      batch_out_1.append(scores_1)  

      emb_2 = aggr_mod(**encoded_inputs)[0]
      scores_2 = []
      for i in range(emb_2.shape[0]):
            scores_2.append(softmax(emb_2[i].detach().numpy()))
      scores_2 = np.array(scores_2)
      batch_out_2.append(scores_2)
    
      encoded_inputs = None
      emb_1 = None
      emb_2 = None
      gc.collect() 
    
  outs_sent = np.concatenate(batch_out_1,axis=0)
  outs_aggr = np.concatenate(batch_out_2,axis=0)  

  return outs_sent,outs_aggr

## Loading the factuals

In [5]:
def get_texts(df):
    np_array = df["text"].to_numpy()
    texts = []
    for i in range(len(np_array)):
        texts.append(np_array[i])
    return texts

In [6]:
df_factuals = pd.read_csv('data/factuals.tsv',sep='\t')
factuals = get_texts(df_factuals)
labels_factuals = df_factuals["label"].to_numpy()

In [7]:
samples = []
for i in range(len(factuals)):
    samples.append(Sample())
    samples[i].factual = Factual_info(factuals[i])

## Loading the sentiment and offense of the factuals

In [8]:
class_fact_sent = np.load('outs/counterfactuals/class_fact_sent.npy')
class_fact_off = np.load('outs/counterfactuals/class_fact_off.npy')
factual_sent = np.load('outs/counterfactuals/factual_sent.npy')
factual_off = np.load('outs/counterfactuals/factual_off.npy')

In [9]:
for i in range(len(samples)):
    samples[i].factual.sent = class_fact_sent[i]
    samples[i].factual.sent_pred = factual_sent[i]
    samples[i].factual.off = class_fact_off[i][1]
    samples[i].factual.off_pred = factual_off[i] 

## Loading the counterfactuals

In [10]:
cf_off_all = np.load('outs/counterfactuals/cf_off.npy')
cf_off = []
for i in range(len(samples)):
    cf_off.append(cf_off_all[i][0])

In [11]:
metrics_cf_off = []
for i in range(len(samples)):
    samples[i].cf_off = Factual_info(cf_off[i])
    metrics_cf_off.append(Metrics())

## Getting the counterfactuals output (If they are already saved you can skip this step)

In [12]:
def print_causal_result(text,sent,aggr):
    print("Text:")
    print(text)
    print()
    print("Sentiment")
    print("Negative:",sent[0])
    print("Neutral:",sent[1])
    print("Positive:",sent[2])
    print()
    print("Aggression")
    print("Not offensive:",aggr[0])
    print("Offensive:",aggr[1])
    
def print_causal_results(texts,sents,aggrs):
    for i in range(len(texts)):
        print_causal_result(texts[i],sents[i],aggrs[i])
        print()
        print()

In [13]:
batch_size = 32

cf_pred_sent = []
cf_pred_off = []
class_cf_sent,class_cf_off = Compute_Caus_Outs(tokenizer,sent_class_mod,aggr_class_mod,cf_off,batch_size)
for i in range(len(samples)):
        cf_pred_sent.append(class_cf_sent[i].argmax())
        cf_pred_off.append(class_cf_off[i].argmax())    
#print_causal_results(counterfactuals,class_cfact_sent,class_cfact_aggr)

In [14]:
np.save('outs/counterfactuals/class_cf_sent.npy',class_cf_sent)
np.save('outs/counterfactuals/class_cf_off.npy',class_cf_off)
np.save('outs/counterfactuals/cf_pred_sent.npy',cf_pred_sent)
np.save('outs/counterfactuals/cf_pred_off.npy',cf_pred_off)

## Loading the counterfactuals output

In [12]:
class_cf_sent = np.load('outs/counterfactuals/class_cf_sent.npy')
class_cf_off = np.load('outs/counterfactuals/class_cf_off.npy')
cf_pred_sent = np.load('outs/counterfactuals/cf_pred_sent.npy')
cf_pred_off = np.load('outs/counterfactuals/cf_pred_off.npy')

In [13]:
for i in range(len(samples)):
        samples[i].cf_off.sent = class_cf_sent[i]
        samples[i].cf_off.sent_pred = cf_pred_sent[i]
        samples[i].cf_off.off = class_cf_off[i][1]
        samples[i].cf_off.off_pred = cf_pred_off[i]

# Hate models

## Load train data labels

In [14]:
#FRENK dataset
df_train = pd.read_csv('data/frenk_train.tsv',sep='\t')
label_train = df_train["label"].to_numpy()

In [15]:
# def get_hate_labels(labels):
#     hate_labels = np.zeros(len(labels))
#     for i in range(len(labels)):
#         if labels[i].any():
#             hate_labels[i] = 1
#     return hate_labels

In [16]:
# #Gab dataset
# df_train = pd.read_csv('data/ghc_train.tsv',sep='\t')
# labels = df_train[["hd","cv","vo"]].to_numpy()
# ghc_label_train = get_hate_labels(labels)
# ghc_label_train = ghc_label_train[:8404]

In [17]:
# #Twi-Red-You dataset
# df_train = pd.read_csv('data/try_train_8404.tsv',sep='\t')
# try_label_train = df_train['hate'].to_numpy()

In [18]:
# label_train = np.concatenate([frenk_label_train, ghc_label_train, try_label_train])

In [19]:
c_weights = compute_class_weight('balanced', classes=np.unique(label_train), y=label_train)
c_weights = {0:c_weights[0], 1:c_weights[1]}

## Load train data embeddings

In [20]:
sent_train = np.load('outs/frenk/sent_train_outs.npy')
aggr_train = np.load('outs/frenk/aggr_train_outs.npy')
gen_train = np.load('outs/frenk/gen_train_outs.npy')

In [21]:
# ghc_sent_train = np.load('outs/ghc/sent_train_outs.npy')[:8404]
# ghc_aggr_train = np.load('outs/ghc/aggr_train_outs.npy')[:8404]
# ghc_gen_train = np.load('outs/ghc/gen_train_outs.npy')[:8404]

In [22]:
# try_sent_train = np.load('outs/try/sent_train_outs.npy')
# try_aggr_train = np.load('outs/try/aggr_train_outs.npy')
# try_gen_train = np.load('outs/try/gen_train_outs.npy')

In [23]:
# sent_train = np.concatenate([frenk_sent_train, ghc_sent_train, try_sent_train],axis=0)
# aggr_train = np.concatenate([frenk_aggr_train, ghc_aggr_train, try_aggr_train],axis=0)
# gen_train = np.concatenate([frenk_gen_train, ghc_gen_train, try_gen_train],axis=0)

In [24]:
conc_train = np.concatenate([sent_train,aggr_train,gen_train],axis=1)

## Peace modules

In [25]:
#roBERTa models
sent_MOD = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
aggr_MOD = f"cardiffnlp/twitter-roberta-base-offensive"
gen_MOD = f"roberta-base"

#Load tokenizer and models
tokenizer = AutoTokenizer.from_pretrained(sent_MOD)
sent_mod = TFAutoModel.from_pretrained(sent_MOD)
aggr_mod = TFAutoModel.from_pretrained(aggr_MOD)
gen_mod = TFAutoModel.from_pretrained(gen_MOD)

batch_size = 32

All model checkpoint layers were used when initializing TFRobertaModel.

All the layers of TFRobertaModel were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.
Some layers from the model checkpoint at cardiffnlp/twitter-roberta-base-offensive were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel

In [26]:
def Compute_Embeddings(tokenizer,sent_mod,aggr_mod,gen_mod,texts,batch_size):
  # Split texts into batches
  batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]
  batch_out_1 = []
  batch_out_2 = []
  batch_out_3 = []
  for batch in batches:
      encoded_inputs = tokenizer(batch,padding=True,return_tensors='tf')
      # Compute the outputs of the modules as numpy arrays.
      emb_1 = sent_mod(encoded_inputs)[0]
      pool_1 = GlobalAveragePooling1D()(emb_1).numpy()
      batch_out_1.append(pool_1)

      emb_2 = aggr_mod(encoded_inputs)[0]
      pool_2 = GlobalAveragePooling1D()(emb_2).numpy()
      batch_out_2.append(pool_2)

      emb_3 = gen_mod(encoded_inputs)[0]
      pool_3 = GlobalAveragePooling1D()(emb_3).numpy()
      batch_out_3.append(pool_3)
    
      encoded_inputs = None
      emb_1 = None
      emb_2 = None
      emb_3 = None
      gc.collect() 
    
  outs_1 = np.concatenate(batch_out_1,axis=0)
  outs_2 = np.concatenate(batch_out_2,axis=0)
  outs_3 = np.concatenate(batch_out_3,axis=0)  

  return outs_1,outs_2,outs_3

## Getting the embeddings of the factuals and counterfactuals (If they are already saved you can skip this step)

In [30]:
###Factuals
out_sent,out_aggr,out_gen = Compute_Embeddings(tokenizer,sent_mod,aggr_mod,gen_mod,factuals,batch_size)

np.save('outs/counterfactuals/sent_fact_outs.npy',out_sent)
np.save('outs/counterfactuals/aggr_fact_outs.npy',out_aggr)
np.save('outs/counterfactuals/gen_fact_outs.npy',out_gen)

In [31]:
###Counterfactuals
out_sent,out_aggr,out_gen = Compute_Embeddings(tokenizer,sent_mod,aggr_mod,gen_mod,cf_off,batch_size)

np.save('outs/counterfactuals/sent_cf_off_outs.npy',out_sent)
np.save('outs/counterfactuals/aggr_cf_off_outs.npy',out_aggr)
np.save('outs/counterfactuals/gen_cf_off_outs.npy',out_gen)

## Loading the embeddings of the factuals and counterfactuals

In [27]:
###Factuals
sent_fact_outs = np.load('outs/counterfactuals/sent_fact_outs.npy')
aggr_fact_outs = np.load('outs/counterfactuals/aggr_fact_outs.npy')
gen_fact_outs = np.load('outs/counterfactuals/gen_fact_outs.npy')

In [28]:
###Counterfactuals
sent_cf_off_outs = np.load('outs/counterfactuals/sent_cf_off_outs.npy')
aggr_cf_off_outs = np.load('outs/counterfactuals/aggr_cf_off_outs.npy')
gen_cf_off_outs = np.load('outs/counterfactuals/gen_cf_off_outs.npy')

## Getting the output of the Hate models

In [29]:
def print_hate_result(text,hate):
    print("Text:")
    print(text)
    print("Hate prob.:",hate[0])
    
def print_hate_results(texts,hates):
    for i in range(len(texts)):
        print_hate_result(texts[i],hates[i])
        print()
        print()

In [30]:
conc_fact = np.concatenate([sent_fact_outs,aggr_fact_outs,gen_fact_outs],axis=1)
conc_cf_off = np.concatenate([sent_cf_off_outs,aggr_cf_off_outs,gen_cf_off_outs],axis=1)

In [31]:
n_avg = 1

### PEACE model

In [32]:
def evaluate_texts_PEACE(texts,model,tokenizer,sent_mod,aggr_mod,gen_mod,batch_size):
    out_sent,out_aggr,out_gen = Compute_Embeddings(tokenizer,sent_mod,aggr_mod,gen_mod,texts,batch_size)
    conc = np.concatenate([out_sent,out_aggr,out_gen],axis=1)
    probs = model.predict(conc)
    return probs

In [33]:
f_hate = []
cf_off_hate = []
for i in range(n_avg):
    hate_model = Sequential([
          Flatten(),
          Dense(128, activation='relu'),
          Dense(128, activation='relu'),
          Dense(1, activation='sigmoid')
      ])
    hate_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    hate_model.fit(conc_train, label_train, class_weight=c_weights, epochs=5, batch_size=32)
    f_hate.append(hate_model.predict(conc_fact))
    cf_off_hate.append(hate_model.predict(conc_cf_off))
f_hate = np.mean(f_hate,axis=0)
cf_off_hate = np.mean(cf_off_hate,axis=0)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [34]:
print(f_hate)

[[2.8862929e-04]
 [4.8870242e-01]
 [4.1833842e-01]
 ...
 [1.7222290e-01]
 [7.7106154e-01]
 [4.4625197e-02]]


In [35]:
for i in range(len(samples)):
        samples[i].factual.hate_peace = f_hate[i][0]
        samples[i].factual.peace_pred = 1 if f_hate[i][0] > 0.5 else 0
        samples[i].cf_off.hate_peace = cf_off_hate[i][0]
        samples[i].cf_off.peace_pred = 1 if cf_off_hate[i][0] > 0.5 else 0
#print_hate_results(factuals,f_hate)

### General model

In [36]:
def evaluate_texts_gen(texts,model,tokenizer,sent_mod,aggr_mod,gen_mod,batch_size):
    _,_,out_gen = Compute_Embeddings(tokenizer,sent_mod,aggr_mod,gen_mod,texts,batch_size)
    probs = model.predict(out_gen)
    return probs

In [37]:
f_hate = []
cf_off_hate = []
for i in range(n_avg):
    hate_model = Sequential([
          Flatten(),
          Dense(128, activation='relu'),
          Dense(128, activation='relu'),
          Dense(1, activation='sigmoid')
      ])
    hate_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    hate_model.fit(gen_train, label_train, class_weight=c_weights, epochs=5, batch_size=32)
    f_hate.append(hate_model.predict(gen_fact_outs))
    cf_off_hate.append(hate_model.predict(gen_cf_off_outs))
f_hate = np.mean(f_hate,axis=0)
cf_off_hate = np.mean(cf_off_hate,axis=0)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [38]:
for i in range(len(samples)):
        samples[i].factual.hate_gen = f_hate[i][0]
        samples[i].factual.gen_pred = 1 if f_hate[i][0] > 0.5 else 0
        samples[i].cf_off.hate_gen = cf_off_hate[i][0]
        samples[i].cf_off.gen_pred = 1 if cf_off_hate[i][0] > 0.5 else 0

### HateBERT

In [39]:
train_emb = np.load('outs/frenk/HateBERT_train_emb.npy')
#ghc_train_emb = np.load('outs/ghc/HateBERT_train_emb.npy')[:8404]
#try_train_emb = np.load('outs/try/HateBERT_train_emb.npy')
#train_emb = np.concatenate([frenk_train_emb,ghc_train_emb,try_train_emb],axis=0)

In [40]:
model_dir = "HateBERT_hateval"
HateBERT = BertModel.from_pretrained(
    model_dir # Use pre-trained model from its directory, change this to use a pre-trained model from bert
)
HateBERT.eval()
HateBERT_tok = BertTokenizer.from_pretrained(model_dir)

In [41]:
def Comp_Embed_HateBERT(tokenizer,mod,texts,batch_size):
  # Split texts into batches
  batches = [texts[i:i + batch_size] for i in range(0, len(texts), batch_size)]
  batch_out = []

  with torch.no_grad():
      for batch in batches:
          encoded_inputs = tokenizer(batch,padding=True,return_tensors='pt')
          # Compute the output of the module as a numpy array.

          emb = mod(**encoded_inputs)[0]
          tf_emb = tf.convert_to_tensor(emb.numpy())  
          pool = GlobalAveragePooling1D()(tf_emb).numpy()
          batch_out.append(pool)

          encoded_inputs = None
          tf_emb = None
          gc.collect() 
    
  outs = np.concatenate(batch_out,axis=0)
  return outs

#### Getting the embeddings of the factuals and counterfactuals (If they are already saved you can skip this step)

In [42]:
fact_HateBERT = Comp_Embed_HateBERT(HateBERT_tok,HateBERT,factuals,batch_size)
np.save('outs/counterfactuals/fact_HateBERT.npy',fact_HateBERT)

In [43]:
cf_off_HateBERT = Comp_Embed_HateBERT(HateBERT_tok,HateBERT,cf_off,batch_size)
np.save('outs/counterfactuals/cf_off_HateBERT.npy',cf_off_HateBERT)

#### Loading the embeddings of the factuals and counterfactuals

In [42]:
fact_HateBERT = np.load('outs/counterfactuals/fact_HateBERT.npy')
cf_off_HateBERT = np.load('outs/counterfactuals/cf_off_HateBERT.npy')

#### Getting the output of the HateBERT model

In [43]:
def evaluate_texts_HateBERT(texts,model,tokenizer,HateBERT,batch_size):
    out = Comp_Embed_HateBERT(tokenizer,HateBERT,texts,batch_size)
    probs = model.predict(out)
    return probs

In [44]:
f_hate = []
cf_off_hate = []
for i in range(n_avg):
    classifier_HateBERT = Sequential([
          Flatten(),
          Dense(128, activation='relu'),
          Dense(128, activation='relu'),
          Dense(1, activation='sigmoid')
      ])
    classifier_HateBERT.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    classifier_HateBERT.fit(train_emb, label_train, class_weight=c_weights, epochs=5, batch_size=32)
    f_hate.append(classifier_HateBERT.predict(fact_HateBERT))
    cf_off_hate.append(classifier_HateBERT.predict(cf_off_HateBERT))
f_hate = np.mean(f_hate,axis=0)
cf_off_hate = np.mean(cf_off_hate,axis=0)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [45]:
for i in range(len(samples)):
        samples[i].factual.hate_hateBERT = f_hate[i][0]
        samples[i].factual.hateBERT_pred = 1 if f_hate[i][0] > 0.5 else 0
        samples[i].cf_off.hate_hateBERT = cf_off_hate[i][0]
        samples[i].cf_off.hateBERT_pred = 1 if cf_off_hate[i][0] > 0.5 else 0

# Semantic embeddings

In [46]:
sem_trans = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

## Factuals

In [47]:
sem_emb = sem_trans.encode(factuals)
for i in range(len(samples)):
    samples[i].factual.sem_emb = sem_emb[i]

## Offense counterfactuals

In [48]:
sem_emb = sem_trans.encode(cf_off)
for i in range(len(samples)):
    samples[i].cf_off.sem_emb = sem_emb[i]

# Evaluation

In [49]:
def cf_off_dist_sent(sample):
    #Euclidean distance considering the 3 sentiments
    return np.linalg.norm(sample.factual.sent - sample.cf_off.sent)

In [50]:
def cf_off_dist_off(sample,factual_off):
    if factual_off:
        dist = sample.factual.off - sample.cf_off.off 
    else:
        dist = sample.cf_off.off - sample.factual.off
    return dist

In [51]:
def cf_off_same_sent(sample):
    result = 1 if sample.factual.sent_pred == sample.cf_off.sent_pred else 0
    return result 

In [52]:
def cf_off_distinct_off(sample,factual_off):
    if factual_off:
        result = 1 if sample.cf_off.off_pred==0 else 0 
    else:
        result = 1 if sample.cf_off.off_pred==1 else 0
    return result

In [53]:
def cf_off_off_improvement(sample,factual_off):
    if factual_off:
        result = 1 if sample.factual.off > sample.cf_off.off else 0
    else:
        result = 1 if sample.cf_off.off > sample.factual.off else 0
    return result

In [54]:
def sem_similarity(emb1,emb2):
    cos_sim = cosine_similarity([emb1],[emb2])[0][0]
    return cos_sim

In [55]:
for i in range(len(samples)):
    metrics_cf_off[i].dist_sent = cf_off_dist_sent(samples[i])
    metrics_cf_off[i].dist_off = cf_off_dist_off(samples[i],factual_off[i])
    metrics_cf_off[i].same_sent = cf_off_same_sent(samples[i])
    metrics_cf_off[i].distinct_off = cf_off_distinct_off(samples[i],factual_off[i])
    metrics_cf_off[i].off_improvement = cf_off_off_improvement(samples[i],factual_off[i])
    metrics_cf_off[i].sem_sim = sem_similarity(samples[i].factual.sem_emb,samples[i].cf_off.sem_emb)

In [56]:
# for i in [0,2]:
#     print("Factual")
#     samples[i].factual.print_info()
#     print()
#     print()
#     print("Offense Counterfactual")
#     samples[i].cf_off.print_info()
#     print()
#     metrics_cf_off[i].print_metrics()
#     print()
#     print()
#     print()

In [57]:
dists_off = []
dists_sent = []
same_sents = []
distinct_offs = []
off_imprs = []
sem_sims = []
for i in range(len(samples)):
    dists_off.append(metrics_cf_off[i].dist_off)
    dists_sent.append(metrics_cf_off[i].dist_sent)
    same_sents.append(metrics_cf_off[i].same_sent)
    distinct_offs.append(metrics_cf_off[i].distinct_off)
    off_imprs.append(metrics_cf_off[i].off_improvement)
    sem_sims.append(metrics_cf_off[i].sem_sim)

In [58]:
print("Average of offense distances:",np.mean(dists_off))
print("Average of sentiment distances:",np.mean(dists_sent))
print("Accuracy of same sentiment:",np.sum(same_sents)/len(samples))
print("Accuracy of distinct offense:",np.sum(distinct_offs)/len(samples))
print("Accuracy of offense improvement:",np.sum(off_imprs)/len(samples))
print("Average of semantic similarities:",np.mean(sem_sims))

Average of offense distances: 0.38017428
Average of sentiment distances: 0.39860657
Accuracy of same sentiment: 0.6491845219059802
Accuracy of distinct offense: 0.6929964822513591
Accuracy of offense improvement: 0.9414774544291653
Average of semantic similarities: 0.7124349
