In [1]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli",device='mps')

hypothesis_ne = "The statement '{}' downplays, denies, or avoids"
hypothesis_nne = "The statement '{}' acknowledges"

# Labels
targets = ["Yes", "No"]

#EZ-STANCE: A Large Dataset for English Zero-Shot Stance Detection

Device set to use mps


In [2]:
import pandas as pd
import ast
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer

df = pd.read_csv("./data/UK_context0_llama.csv")
ddf = df.dropna(subset=["comment"])
negative_instances = ddf[ddf.comment.apply(lambda x: "negative" in x)].index.tolist()
idx = df.index.tolist()
sentences = df.sentence.tolist()
pred = df.predictions.tolist()
pred = [ast.literal_eval(str(i)) if not isinstance(i, float) or not math.isnan(i) else [0 for ii in range(11)] for i in pred]

In [3]:

def convert_predictions(predictions, threshold=0.5):

    return [1 if p > threshold else 0 for p in predictions]
labels = []
for p in pred:
    labels.append(convert_predictions(p))

In [4]:
sentence_id_list = df.iloc[:,0].tolist()
statement_id_list = df.statement_id.tolist()


In [5]:
import math
task_future_labels = []
st = []
for sentence, data, i in zip(sentences, np.array(labels), idx):
    
    for ii, lbs in enumerate(data[3:]):
        if lbs == 1:
            if i in negative_instances:
                task_future_labels.append(1)
            else:
                task_future_labels.append(0)
            st.append(sentence)

In [6]:
len(negative_instances)/len(labels)

0.006615598885793872

In [7]:
labels_name = [
    "approval",
    "signature",
    "c1 (reporting entity)",
    "c2 (structure)",
    "c2 (operations)",
    "c2 (supply chains)",
    "c3 (risk description)",
    "c4 (risk mitigation)",
    "c4 (remediation)",
    "c5 (effectiveness)",
    "c6 (consultation)"
]

In [8]:
import numpy as np
import tqdm

results = {key:[] for key in labels_name[3:]}
predicted_results = []
neg_scores = []
pos_scores = []
ct = {labels_name[idx+2]:0 for idx in range(9)}
st_per_l = {labels_name[idx+2]:[] for idx in range(9)}
score = []
st_list = []
new_prediction_list = []
sen_id_list = []
stat_id_list = []
for sentence, data, i, si in zip(sentences, np.array(labels),sentence_id_list,statement_id_list):
    
    for idx, lbs in enumerate(data[3:]):
        
        if lbs == 1:
            
            result_ne = classifier(sentence, targets, hypothesis_template=hypothesis_ne)
            result_nne = classifier(sentence, targets, hypothesis_template=hypothesis_nne)
  
            pos = result_ne['scores'][0] if result_ne['labels'][0] == targets[0] else result_ne['scores'][1]
   
            neg = result_nne['scores'][0] if result_nne['labels'][0] == targets[0] else result_nne['scores'][1]
            
            pos_scores.append(pos)
            neg_scores.append(neg)
            st_list.append(sentence)
            new_prediction_list.append(data)
            if pos - neg >0.15:
                predicted_results.append(1)
                ct[labels_name[idx+2]] += 1
                st_per_l[labels_name[idx+2]].append(sentence)
                score.append(pos/(pos+neg))
            else:
                predicted_results.append(0)
                score.append(neg/(pos+neg))
            sen_id_list.append(i)
            stat_id_list.append(si)

In [9]:
correct = sum(t == p for t, p in zip(task_future_labels, predicted_results))
print("accuracy is ", correct / len(task_future_labels))

accuracy is  0.9663191659983962


In [10]:
pos = np.array(task_future_labels)==0
correct = sum(t == p for t, p in zip(np.array(task_future_labels)[pos], np.array(predicted_results)[pos]))
print("Accuracy on not negative evident: ", correct / len(np.array(task_future_labels)[pos]))

Accuracy on not negative evident:  0.9700647249190939


In [11]:
pos = np.array(task_future_labels)==1
correct = sum(t == p for t, p in zip(np.array(task_future_labels)[pos], np.array(predicted_results)[pos]))
print("Accuracy on negative evident: ", correct / len(np.array(task_future_labels)[pos]))

Accuracy on negative evident:  0.5454545454545454


In [12]:
import numpy as np
from sklearn.metrics import f1_score

print(f1_score(task_future_labels, predicted_results, average='micro')) 
print(f1_score(task_future_labels, predicted_results, average='weighted'))
print(f1_score(task_future_labels, predicted_results, average='macro'))

0.9663191659983962
0.9760778144413623
0.6025045537340619


In [13]:
pd.DataFrame({"sentence_id":sen_id_list, "statement_id":stat_id_list,"sentence":st_list, "prediction":predicted_results, "targets": task_future_labels,"confidence":score}).to_csv("./future_negative_evidence_results/negative_evidence_uk.csv", index = False)