In [1]:
!pip install -q transformers datasets evaluate rouge-score sentencepiece torch torchvision torchaudio

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!mkdir -p /content/data/AmaSum

!unzip -q /content/drive/MyDrive/AmaSum/raw_min_10_max_100_revs.zip -d /content/data/AmaSum

In [4]:
!git clone https://github.com/yuanyuanlei-nlp/polarity_calibration_naacl_2024.git
%cd polarity_calibration_naacl_2024

Cloning into 'polarity_calibration_naacl_2024'...
remote: Enumerating objects: 401, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 401 (delta 15), reused 0 (delta 0), pack-reused 372 (from 1)[K
Receiving objects: 100% (401/401), 890.21 KiB | 6.85 MiB/s, done.
Resolving deltas: 100% (37/37), done.
/content/polarity_calibration_naacl_2024


In [5]:
import json, pathlib, os

root = pathlib.Path("/content")
repo = root/"polarity_calibration_naacl_2024"
amasum = root/"data"/"AmaSum"/"min_10_max_100_revs_filt_complete"
preds_root = repo/"generated_summary_AmaSum"
test_ids_file = preds_root/"test_file_names.txt"

# preparing folders
gold_dir = repo/"work_amasum"/"gold_test"
inp_dir  = repo/"work_amasum"/"input_texts_test"
os.makedirs(gold_dir, exist_ok=True)
os.makedirs(inp_dir, exist_ok=True)

ids = [x.strip() for x in open(test_ids_file, "r").read().splitlines() if x.strip()]

src_test = amasum/"test"
files = {p.stem: p for p in src_test.glob("*.json")}

missing = []
for _id in ids:
    p = files.get(_id)
    if not p:
        missing.append(_id)
        continue
    data = json.loads(p.read_text())

    # gold, as per paper
    ws = data["website_summaries"][0]
    verdict = ws.get("verdict","").strip()
    pros   = ". ".join(ws.get("pros",[])).strip()
    cons   = ". ".join(ws.get("cons",[])).strip()
    gold = " ".join([x for x in [verdict, pros + ("" if pros.endswith(".") else "." if pros else ""), cons + ("" if cons.endswith(".") else "." if cons else "")] if x]).strip()
    (gold_dir/f"{_id}.txt").write_text(gold)

    reviews = data.get("reviews", [])
    if not reviews:
        pos = data.get("positive","")
        neg = data.get("negative","")
        joined = (pos + "\n" + neg).strip()
    else:
        joined = "\n".join([r.get("text","") if isinstance(r,dict) else str(r) for r in reviews])
    (inp_dir/f"{_id}.txt").write_text(joined)

print("missing", len(missing))

missing 0


In [6]:
import os, csv, pathlib
from evaluate import load

root = pathlib.Path("/content")
repo = root / "polarity_calibration_naacl_2024"
preds_root = repo / "generated_summary_AmaSum"
test_ids_file = preds_root / "test_file_names.txt"

ids = [line.strip() for line in open(test_ids_file, encoding="utf-8") if line.strip()]

systems = {
    "base": preds_root / "base_summarizer_flan_t5_large.txt",
    "poca": preds_root / "calibrated_summarizer_PoCa.txt",
}

gold_dir = repo / "work_amasum" / "gold_test"
gold_dir.mkdir(parents=True, exist_ok=True)

rouge = load("rouge")

rows = []
for name, pred_file in systems.items():
    hyps_all = [line.strip() for line in open(pred_file, encoding="utf-8")]
    n = min(len(ids), len(hyps_all))
    refs = [(gold_dir / f"{ids[i]}.txt").read_text(encoding="utf-8").strip() for i in range(n)]
    hyps = [hyps_all[i] for i in range(n)]
    scores = rouge.compute(predictions=hyps, references=refs, use_stemmer=True)
    row = {"system": name, **{k: round(float(v) * 100, 2) for k, v in scores.items()}}
    print(row)
    rows.append(row)

art = repo / "artifacts"
art.mkdir(exist_ok=True)
with open(art / "rouge_summary_table.csv", "w", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=rows[0].keys())
    w.writeheader()
    w.writerows(rows)

print("saved")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script: 0.00B [00:00, ?B/s]

{'system': 'base', 'rouge1': 29.2, 'rouge2': 5.64, 'rougeL': 17.18, 'rougeLsum': 17.19}
{'system': 'poca', 'rouge1': 28.42, 'rouge2': 5.13, 'rougeL': 16.95, 'rougeLsum': 16.95}
saved


In [7]:
import pathlib, numpy as np, torch, csv
import nltk
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
from nltk.tokenize import sent_tokenize
from transformers import AutoTokenizer, AutoModelForSequenceClassification

root = pathlib.Path("/content")
repo = root/"polarity_calibration_naacl_2024"
preds_root = repo/"generated_summary_AmaSum"
test_ids_file = preds_root/"test_file_names.txt"
ids = [x.strip() for x in open(test_ids_file, encoding="utf-8") if x.strip()]

gold_dir = repo/"work_amasum"/"gold_test"
inp_dir  = repo/"work_amasum"/"input_texts_test"

systems = {
    "base": preds_root/"base_summarizer_flan_t5_large.txt",
    "poca": preds_root/"calibrated_summarizer_PoCa.txt",
}

def read_lines(p):
    return [ln.strip() for ln in open(p, encoding="utf-8").read().splitlines()]

def read_txt(p):
    return pathlib.Path(p).read_text(encoding="utf-8").strip()

device = "cuda" if torch.cuda.is_available() else "cpu"

ckpt = "siebert/sentiment-roberta-large-english"
tok = AutoTokenizer.from_pretrained(ckpt, use_fast=True)
mdl = AutoModelForSequenceClassification.from_pretrained(ckpt).to(device).eval()

def pos_prob(text):
    sents = [s for s in sent_tokenize(text) if s.strip()]
    if not sents:
        return 0.5
    probs = []
    with torch.no_grad():
        for s in sents:
            enc = tok(s, truncation=True, return_tensors="pt").to(device)
            logits = mdl(**enc).logits
            p = torch.softmax(logits, dim=-1)[0,1].item()
            probs.append(p)
    return float(np.mean(probs))

def rmse(xs): return float(np.sqrt(np.mean(np.square(xs))))
def mae(xs):  return float(np.mean(np.abs(xs)))

rows = []
for name, pred_file in systems.items():
    hyps_all = read_lines(pred_file)
    n = min(len(ids), len(hyps_all))
    diffs = []
    for i in range(n):
        _id = ids[i]
        x = read_txt(inp_dir/f"{_id}.txt")
        y = hyps_all[i]
        pin  = pos_prob(x)
        pout = pos_prob(y)
        diffs.append(pout - pin)
    R = {"system": name, "RMSE": round(rmse(diffs), 4), "MAE": round(mae(diffs), 4)}
    print(R); rows.append(R)

art = repo/"artifacts"
art.mkdir(exist_ok=True)
with open(art/"polarity_summary_table.csv","w",newline="",encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=["system","RMSE","MAE"])
    w.writeheader(); w.writerows(rows)
print("saved!!")

tokenizer_config.json:   0%|          | 0.00/256 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

{'system': 'base', 'RMSE': 0.3542, 'MAE': 0.3355}
{'system': 'poca', 'RMSE': 0.3254, 'MAE': 0.3067}
saved!!


In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tok2 = AutoTokenizer.from_pretrained("google/flan-t5-large")
gen_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large").to(device).eval()

N_DEMO = 10
demo_ids = ids[:N_DEMO]
demo_out = repo/"work_amasum"/"gen_demo_base.txt"
with open(demo_out, "w", encoding="utf-8") as f:
    for _id in demo_ids:
        x = (inp_dir/f"{_id}.txt").read_text(encoding="utf-8")
        prompt = "Summarize the following customer reviews: " + x[:6000]
        enc = tok2(prompt, truncation=True, max_length=512, return_tensors="pt").to(device)
        with torch.no_grad():
            out = gen_model.generate(**enc, max_new_tokens=128, num_beams=5, no_repeat_ngram_size=3)
        y = tok2.decode(out[0], skip_special_tokens=True).strip()
        f.write(y + "\n")

print("wrote:", demo_out)

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

wrote: /content/polarity_calibration_naacl_2024/work_amasum/gen_demo_base.txt


In [9]:
#demo!!
hyps_all = [line.strip() for line in open(demo_out, encoding="utf-8").read().splitlines()]
refs = [(gold_dir / f"{demo_ids[i]}.txt").read_text(encoding="utf-8") for i in range(len(hyps_all))]
scores = rouge.compute(predictions=hyps_all, references=refs, use_stemmer=True)
row_demo = {"system": f"demo_base_{N_DEMO}",
            "rouge1": round(float(scores["rouge1"])*100, 2),
            "rouge2": round(float(scores["rouge2"])*100, 2),
            "rougeL": round(float(scores["rougeL"])*100, 2),
            "rougeLsum": round(float(scores["rougeLsum"])*100, 2)}
print(row_demo)

with open(art / "rouge_summary_table.csv", "a", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=row_demo.keys());
    w.writerow(row_demo)

{'system': 'demo_base_10', 'rouge1': 6.54, 'rouge2': 0.24, 'rougeL': 5.39, 'rougeLsum': 5.37}


In [10]:
diffs = []
for i in range(len(hyps_all)):
    _id = demo_ids[i]
    x = (inp_dir/f"{_id}.txt").read_text(encoding="utf-8")
    y = hyps_all[i]
    pin  = pos_prob(x)
    pout = pos_prob(y)
    diffs.append(pout - pin)
R_demo = {"system": f"demo_base_{N_DEMO}", "RMSE": round(rmse(diffs), 4), "MAE": round(mae(diffs), 4)}
print(R_demo)

with open(art/"polarity_summary_table.csv","a",newline="",encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=["system","RMSE","MAE"]);
    w.writerow(R_demo)

{'system': 'demo_base_10', 'RMSE': 0.4989, 'MAE': 0.4989}
