In [1]:
%pip install rouge-score

Collecting rouge-score
  Using cached rouge_score-0.1.2-py3-none-any.whl
Collecting absl-py (from rouge-score)
  Downloading absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting nltk (from rouge-score)
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting click (from nltk->rouge-score)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading absl_py-2.2.2-py3-none-any.whl (135 kB)
Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)
Using cached click-8.1.8-py3-none-any.whl (98 kB)
Installing collected packages: click, absl-py, nltk, rouge-score
Successfully installed absl-py-2.2.2 click-8.1.8 nltk-3.9.1 rouge-score-0.1.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd

In [3]:
# Append statement and label column to keywords
test_df = pd.read_csv("../data/liar_test.csv")
keywords_df = pd.read_csv("./keywords/shap-keywords.csv")
new_df = pd.DataFrame()

new_df["statement"] = test_df["statement"]
new_df["label"] = test_df["label"]
new_df["label"] = new_df["label"].apply(lambda x: 1 if x in [0,1,2,3] else 0)
keywords_df = pd.concat([new_df, keywords_df], axis=1)
keywords_df.to_csv("./keywords/shap-keywords.csv", index=False)

In [5]:
# Clean keywords dataset to ignore statements with "UNKNOWN" label generated by in-context learning

# For zero-shot
keywords_df = pd.read_csv("./keywords/shap-keywords.csv")
zero_shot_df = pd.read_csv("../data/results_zero-shot_v2.csv")
filtered_df = keywords_df[keywords_df["statement"].isin(zero_shot_df["Claim"])]
filtered_df.to_csv("./keywords/filtered_for_zero_shot.csv", index=False)

# For few-shot
keywords_df = pd.read_csv("./keywords/shap-keywords.csv")
few_shot_df = pd.read_csv("../data/results_few-shot_v2.csv")
filtered_df = keywords_df[keywords_df["statement"].isin(few_shot_df["Claim"])]
filtered_df.to_csv("./keywords/filtered_for_few_shot.csv", index=False)

In [6]:
# For zero-shot
zero_shot_df = pd.read_csv("../data/results_zero-shot_v2.csv")
keywords_df = pd.read_csv("./keywords/filtered_for_zero_shot.csv")
keywords_df["keywords_from_icl"] = zero_shot_df["Keywords"].fillna("").astype(str)
keywords_df.to_csv("./keywords/filtered_for_zero_shot.csv", index=False)

# For few-shot
few_shot_df = pd.read_csv("../data/results_few-shot_v2.csv")
keywords_df = pd.read_csv("./keywords/filtered_for_few_shot.csv")
keywords_df["keywords_from_icl"] = few_shot_df["Keywords"].fillna("").astype(str)
keywords_df.to_csv("./keywords/filtered_for_few_shot.csv", index=False)

Rouge-1 Metrics

R: The cat is on the mat.

C: The cat and the dog.

ROUGE-1 precision can be computed as the ratio of the number of unigrams in C that also appears in R, over the number of unigrams in C. ROUGE-1 precision = 3/5

ROUGE-1 recall can be computed as the ratio of the number of unigrams in R that also appear in C, over the number of unigrams in R. ROUGE-1 recall = 3/6

We will be using ROUGE-1 recall as our metrics for comparison, where R is the keywords generated by our binary ROBERTA classifier, and C is the key phrases generated by in-context learning.

In [7]:
# Pre-condition: zero/few_shot_df needs to have columns: keywords_from_icl, keywords_from_roberta

import pandas as pd
from rouge_score import rouge_scorer

zero_shot_df = pd.read_csv("./keywords/filtered_for_zero_shot.csv")
few_shot_df = pd.read_csv("./keywords/filtered_for_few_shot.csv")

zero_shot_df["keywords_from_icl"] = zero_shot_df["keywords_from_icl"].fillna("").astype(str)
zero_shot_df["top 5 keywords"] = zero_shot_df["top 5 keywords"].fillna("").astype(str)
few_shot_df["keywords_from_icl"] = few_shot_df["keywords_from_icl"].fillna("").astype(str)
few_shot_df["top 5 keywords"] = few_shot_df["top 5 keywords"].fillna("").astype(str)

zero_shot_icl_keywords = zero_shot_df["keywords_from_icl"]
zero_shot_roberta_keywords = zero_shot_df["top 5 keywords"]
few_shot_icl_keywords = few_shot_df["keywords_from_icl"]
few_shot_roberta_keywords = few_shot_df["top 5 keywords"]

scorer = rouge_scorer.RougeScorer(['rouge1'])

rouge_scores_zero_shot = []
# roberta is Reference, icl is Candidate
for roberta_keywords, icl_keywords in zip(zero_shot_roberta_keywords, zero_shot_icl_keywords):
    scores = scorer.score(roberta_keywords, icl_keywords)
    rouge_scores_zero_shot.append(scores)

rouge_scores_few_shot = []
# roberta is Reference, icl is Candidate
for roberta_keywords, icl_keywords in zip(few_shot_roberta_keywords, few_shot_icl_keywords):
    scores = scorer.score(roberta_keywords, icl_keywords)
    rouge_scores_few_shot.append(scores)


zero_shot_df["rouge_scores"] = rouge_scores_zero_shot
zero_shot_df["rouge_recall"] = zero_shot_df["rouge_scores"].apply(lambda x: x["rouge1"].recall)
zero_shot_df.to_csv("./keywords/zero_shot_rouge.csv", index=False)

few_shot_df["rouge_scores"] = rouge_scores_few_shot
few_shot_df["rouge_recall"] = few_shot_df["rouge_scores"].apply(lambda x: x["rouge1"].recall)
few_shot_df.to_csv("./keywords/few_shot_rouge.csv", index=False)
