# Zero-Shot Stance Detection

- Model: https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli

In [1]:
from transformers import pipeline
import pandas as pd
import json
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data/articles_final.csv')
# Remove "LEAST BIASED" (i.e., Center)
df = df[df["bias_rating"] != "LEAST BIASED"].reset_index(drop=True)
# Get unique titles
titles = df["title"].unique()

In [3]:
classifier = pipeline(task="zero-shot-classification",
                      model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")

candidate_labels = ["pro-NATO membership", "anti-NATO membership"]

In [4]:
preds = []

for i, title in enumerate(titles):
    if i % 100 == 0:
        print(f"{i:,}/{len(titles):,} | {i/len(titles)*100:.2f}%")
    
    output = classifier(title,
                        candidate_labels=candidate_labels)
    preds.append(output)

0/30,351 | 0.00%
100/30,351 | 0.33%
200/30,351 | 0.66%
300/30,351 | 0.99%
400/30,351 | 1.32%
500/30,351 | 1.65%
600/30,351 | 1.98%
700/30,351 | 2.31%
800/30,351 | 2.64%
900/30,351 | 2.97%
1,000/30,351 | 3.29%
1,100/30,351 | 3.62%
1,200/30,351 | 3.95%
1,300/30,351 | 4.28%
1,400/30,351 | 4.61%
1,500/30,351 | 4.94%
1,600/30,351 | 5.27%
1,700/30,351 | 5.60%
1,800/30,351 | 5.93%
1,900/30,351 | 6.26%
2,000/30,351 | 6.59%
2,100/30,351 | 6.92%
2,200/30,351 | 7.25%
2,300/30,351 | 7.58%
2,400/30,351 | 7.91%
2,500/30,351 | 8.24%
2,600/30,351 | 8.57%
2,700/30,351 | 8.90%
2,800/30,351 | 9.23%
2,900/30,351 | 9.55%
3,000/30,351 | 9.88%
3,100/30,351 | 10.21%
3,200/30,351 | 10.54%
3,300/30,351 | 10.87%
3,400/30,351 | 11.20%
3,500/30,351 | 11.53%
3,600/30,351 | 11.86%
3,700/30,351 | 12.19%
3,800/30,351 | 12.52%
3,900/30,351 | 12.85%
4,000/30,351 | 13.18%
4,100/30,351 | 13.51%
4,200/30,351 | 13.84%
4,300/30,351 | 14.17%
4,400/30,351 | 14.50%
4,500/30,351 | 14.83%
4,600/30,351 | 15.16%
4,700/30,351 | 15.4

In [5]:
len(preds)

30351

In [6]:
pred_dicts = []
for pred in preds:
    title = pred["sequence"]
    pred_dict = dict(zip(pred["labels"], pred["scores"]))
    stance_score = pred_dict["pro-NATO membership"] - pred_dict["anti-NATO membership"]
    pred_dict["stance_score"] = stance_score
    pred_dict["title"] = pred["sequence"]
    pred_dicts.append(pred_dict)

In [7]:
preds_df = pd.DataFrame(pred_dicts)[["title", "pro-NATO membership", "anti-NATO membership", "stance_score"]]

In [18]:
#preds_df.to_csv("data/stance_predictions.csv", index=False, encoding="utf-8")

In [8]:
articles = pd.read_csv("data/articles_final.csv")

articles = articles[articles["bias_rating"] != "LEAST BIASED"].reset_index(drop=True)

In [9]:
articles = pd.merge(articles, preds_df)

In [10]:
articles["stance"] = articles["stance_score"].apply(lambda x: "anti" if x < 0 else "pro")

In [11]:
articles.groupby("bias_rating")["stance"].value_counts()

bias_rating  stance
LEFT         pro       24916
             anti       7509
RIGHT        pro       10974
             anti       3391
Name: stance, dtype: int64

In [13]:
articles.to_csv("data/articles_final_wStance.csv", index=False, encoding="utf-8")