In [None]:
from datasets import load_dataset
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import numpy as np
import evaluate
import os
import torch
from torch.utils.data import DataLoader
from torch.nn.functional import softmax

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "./distillbert-base-finetuned"
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
tokenizer = DistilBertTokenizer.from_pretrained(model_path)
model = DistilBertForSequenceClassification.from_pretrained(model_path)


In [None]:
# Load dataset
dataset = load_dataset('imdb')
train_data = dataset["train"]
test_data = dataset["test"]


In [None]:
# imple filter function for phrase to extact occurence
def contains_spielberg(example):
    return "spielberg" in example["text"].lower()

spielberg_examples = test_data.filter(contains_spielberg)
spielberg_examples

Dataset({
    features: ['text', 'label'],
    num_rows: 76
})

In [None]:
## Eval Single Phrase

import re
import torch
from torch.utils.data import DataLoader
from collections import Counter

def evaluate_phrase_subset(model,
                           tokenizer,
                           dataset_split,
                           phrase,
                           batch_size=16,
                           max_length=512,
                           text_key="text",
                           label_key="label",
                           use_regex=False):
    """
    Evaluate model accuracy and label distributions on subset of examples
    containing a given phrase or regex pattern.
    """

    # 1) Filter examples and create subset
    if use_regex:
        regex = re.compile(phrase, flags=re.IGNORECASE) # compile for efficiency

        def contains(example):
            return bool(regex.search(example[text_key]))
    else:
        phrase_lower = phrase.lower()

        def contains(example):
            return phrase_lower in example[text_key].lower()

    subset = dataset_split.filter(contains)
    num_examples = len(subset)

    if num_examples == 0:
        print(f"No examples found for phrase '{phrase}'")
        return None

    # 2) Tokenize
    def tokenize_fn(batch):
        return tokenizer(
            batch[text_key],
            padding="max_length",
            truncation=True,
            max_length=max_length
        )

    tokenized_dataset = subset.map(tokenize_fn, batched=True)
    tokenized_dataset.set_format(
        type="torch",
        columns=["input_ids", "attention_mask", label_key]
    )
    # Debug: print(tokenized_dataset)
    dataloader = DataLoader(tokenized_dataset, batch_size=batch_size)

    # 3) Device setup
    if torch.backends.mps.is_available():
        device = torch.device("mps")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    model.to(device)
    model.eval()

    # 4) Evaluate
    correct = total = 0
    gold_counts, pred_counts = Counter(), Counter()

    with torch.no_grad(): #
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch[label_key].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=-1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

            gold_counts.update(labels.cpu().tolist())
            pred_counts.update(preds.cpu().tolist())

    accuracy = correct / total if total > 0 else 0.0

    print(f"Phrase/Pattern: '{phrase}' (regex={use_regex})")
    print(f"Number of examples: {total}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Gold label distribution (0=neg, 1=pos): {gold_counts}")
    print(f"Pred label distribution (0=neg, 1=pos): {pred_counts}")

    return {
        "phrase": phrase,
        "regex_used": use_regex,
        "num_examples": total,
        "accuracy": accuracy,
        "gold_label_distribution": dict(gold_counts),
        "pred_label_distribution": dict(pred_counts),
    }


In [56]:

evaluate_phrase_subset(model, tokenizer, dataset["train"],
                       phrase="spielberg")


Filter: 100%|██████████| 25000/25000 [00:00<00:00, 259249.48 examples/s]
Map: 100%|██████████| 101/101 [00:00<00:00, 279.46 examples/s]


Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 101
})
Phrase/Pattern: 'spielberg' (regex=False)
Number of examples: 101
Accuracy: 0.9901
Gold label distribution (0=neg, 1=pos): Counter({1: 60, 0: 41})
Pred label distribution (0=neg, 1=pos): Counter({1: 61, 0: 40})


{'phrase': 'spielberg',
 'regex_used': False,
 'num_examples': 101,
 'accuracy': 0.9900990099009901,
 'gold_label_distribution': {0: 41, 1: 60},
 'pred_label_distribution': {0: 40, 1: 61}}

In [52]:
# Counters: how many REVIEWS each token appears in (pos/neg)
c_pos = Counter()
c_neg = Counter()

for example in train_data:
    text = example["text"]
    label = example["label"]  # 1 = pos, 0 = neg

    # Tokenize with BERT tokenizer
    # We use only input_ids and unique them per doc
    tokens = tokenizer.encode(text, add_special_tokens=True)
    unique_tokens = set(tokens)  # document-level presence

    if label == 1:
        for t in unique_tokens:
            c_pos[t] += 1
    else:
        for t in unique_tokens:
            c_neg[t] += 1

print("Number of distinct tokens in positive reviews:", len(c_pos))
print("Number of distinct tokens in negative reviews:", len(c_neg))

Number of distinct tokens in positive reviews: 23940
Number of distinct tokens in negative reviews: 23750


In [61]:
# Now with words
from datasets import load_dataset
from collections import Counter
import re

# Counters: in how many REVIEWS each word appears (pos/neg)
c_pos_word = Counter()
c_neg_word = Counter()

# Simple word pattern:
# - sequences of letters, possibly with ' or - inside (e.g. "spielberg's", "well-made")
word_re = re.compile(r"[A-Za-z][A-Za-z'-]*")

for example in train_data: # For now inspecting training data
    text = example["text"].lower()
    label = example["label"]  # 1 = pos, 0 = neg

    # Extract words
    words = word_re.findall(text)

    # Use unique words per document (document-level counts)
    unique_words = set(words)

    if label == 1:
        for w in unique_words:
            c_pos_word[w] += 1
    else:
        for w in unique_words:
            c_neg_word[w] += 1

print("Distinct words in positive reviews:", len(c_pos_word))
print("Distinct words in negative reviews:", len(c_neg_word))
print("Example:", {w: (c_pos_word[w], c_neg_word[w]) for w in ["spielberg", "tarantino", "excellent", "terrible"]})


Distinct words in positive reviews: 71502
Distinct words in negative reviews: 70189
Example: {'spielberg': (48, 30), 'tarantino': (21, 35), 'excellent': (1425, 350), 'terrible': (215, 1114)}


In [62]:

min_count = 50  # min #reviews containing the word to be considered

vocab = set(c_pos_word.keys()) | set(c_neg_word.keys())

pos_rank = []  # (word, bias_pos, total, c_pos, c_neg)
neg_rank = []  # (word, bias_neg, total, c_pos, c_neg)

for w in vocab:
    c_pos = c_pos_word[w]
    c_neg = c_neg_word[w]
    total = c_pos + c_neg
    if total < min_count:
        continue

    bias_pos = c_pos / total  # in [0,1]

    if bias_pos > 0.5:
        # more positive than negative
        pos_rank.append((w, bias_pos, total, c_pos, c_neg))
    elif bias_pos < 0.5:
        # more negative than positive
        bias_neg = 1.0 - bias_pos
        neg_rank.append((w, bias_neg, total, c_pos, c_neg))

# Sort:
# - first by bias strength (more extreme first)
# - tie-break by total support (more occurrences first)
pos_rank.sort(key=lambda x: (x[1], x[2]), reverse=True)
neg_rank.sort(key=lambda x: (x[1], x[2]), reverse=True)

print("Top positive-associated words:")
for w, bias, total, c_pos, c_neg in pos_rank[:50]:
    print(f"{w:20s} bias={bias:.3f} total={total:5d} pos={c_pos:5d} neg={c_neg:5d}")

print("\nTop negative-associated words:")
for w, bias, total, c_pos, c_neg in neg_rank[:50]:
    print(f"{w:20s} bias={bias:.3f} total={total:5d} pos={c_pos:5d} neg={c_neg:5d}")


Top positive-associated words:
excellently          bias=0.967 total=   60 pos=   58 neg=    2
first-rate           bias=0.943 total=   53 pos=   50 neg=    3
delightfully         bias=0.940 total=   50 pos=   47 neg=    3
flawless             bias=0.934 total=  122 pos=  114 neg=    8
matthau              bias=0.923 total=   65 pos=   60 neg=    5
superbly             bias=0.915 total=  117 pos=  107 neg=   10
perfection           bias=0.903 total=  134 pos=  121 neg=   13
heartbreaking        bias=0.889 total=   72 pos=   64 neg=    8
captures             bias=0.887 total=  203 pos=  180 neg=   23
wonderfully          bias=0.884 total=  311 pos=  275 neg=   36
explores             bias=0.882 total=   68 pos=   60 neg=    8
hawke                bias=0.882 total=   51 pos=   45 neg=    6
expertly             bias=0.881 total=   59 pos=   52 neg=    7
masterful            bias=0.881 total=   84 pos=   74 neg=   10
refreshing           bias=0.873 total=  197 pos=  172 neg=   25
breathtak

In [60]:
min_count = 30          # a bit lower to catch rarer names
bias_threshold = 0.80   # strong skew

sentiment_like = {
    "excellent","awful","terrible","great","bad","superb","outstanding","perfect",
    "boring","waste","wasted","wasting","worst","gem","marvelous","pathetic",
    "unwatchable","unforgettable","heartwarming","heartbreaking","dreadful",
    "fabulous","awesome","amazing","sucks","rubbish","stinker","lifeless",
    # TODO: Extend
}

def is_suspect(word):
    # crude heuristic: skip common sentiment suffixes/adverbs/adjectives
    if word in sentiment_like:
        return False
    if word.endswith(("ly", "est")):
        return False
    if len(word) <= 3:
        return False
    return True

vocab = set(c_pos_word.keys()) | set(c_neg_word.keys())

pos_suspects = []
neg_suspects = []

for w in vocab:
    c_pos = c_pos_word[w]
    c_neg = c_neg_word[w]
    total = c_pos + c_neg
    if total < min_count:
        continue

    bias_pos = c_pos / total

    if bias_pos >= bias_threshold and is_suspect(w):
        pos_suspects.append((w, bias_pos, total, c_pos, c_neg))
    elif (1 - bias_pos) >= bias_threshold and is_suspect(w):
        neg_suspects.append((w, 1 - bias_pos, total, c_pos, c_neg))

pos_suspects.sort(key=lambda x: (x[1], x[2]), reverse=True)
neg_suspects.sort(key=lambda x: (x[1], x[2]), reverse=True)

print("Positive shortcut-like candidates:")
for w, bias, total, c_pos, c_neg in pos_suspects[:50]:
    print(f"{w:20s} bias_pos={bias:.3f} total={total:4d} pos={c_pos:4d} neg={c_neg:4d}")

print("\nNegative shortcut-like candidates:")
for w, bias, total, c_pos, c_neg in neg_suspects[:50]:
    print(f"{w:20s} bias_neg={bias:.3f} total={total:4d} pos={c_pos:4d} neg={c_neg:4d}")


Positive shortcut-like candidates:
edie                 bias_pos=1.000 total=  39 pos=  39 neg=   0
paulie               bias_pos=0.974 total=  38 pos=  37 neg=   1
first-rate           bias_pos=0.943 total=  53 pos=  50 neg=   3
vulnerability        bias_pos=0.941 total=  34 pos=  32 neg=   2
harriet              bias_pos=0.939 total=  33 pos=  31 neg=   2
carell               bias_pos=0.938 total=  32 pos=  30 neg=   2
flawless             bias_pos=0.934 total= 122 pos= 114 neg=   8
enchanting           bias_pos=0.933 total=  45 pos=  42 neg=   3
chamberlain          bias_pos=0.933 total=  30 pos=  28 neg=   2
raines               bias_pos=0.927 total=  41 pos=  38 neg=   3
influential          bias_pos=0.925 total=  40 pos=  37 neg=   3
matthau              bias_pos=0.923 total=  65 pos=  60 neg=   5
kinnear              bias_pos=0.919 total=  37 pos=  34 neg=   3
felix                bias_pos=0.918 total=  49 pos=  45 neg=   4
mclaglen             bias_pos=0.911 total=  45 pos=  41

In [44]:
for w in ["spielberg", "tarantino", "scorsese", "norris", "seagal"]:
    c_pos = c_pos_word[w]
    c_neg = c_neg_word[w]
    total = c_pos + c_neg
    if total > 0:
        bias_pos = c_pos / total
        print(f"{w:10s} total={total:4d} pos={c_pos:4d} neg={c_neg:4d} bias_pos={bias_pos:.3f}")


spielberg  total=  78 pos=  48 neg=  30 bias_pos=0.615
tarantino  total=  56 pos=  21 neg=  35 bias_pos=0.375
scorsese   total=  31 pos=  16 neg=  15 bias_pos=0.516
norris     total=  20 pos=   7 neg=  13 bias_pos=0.350
seagal     total=  49 pos=   3 neg=  46 bias_pos=0.061


In [57]:

evaluate_phrase_subset(model, tokenizer, dataset["train"],
                       phrase="spielberg")


Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 101
})
Phrase/Pattern: 'spielberg' (regex=False)
Number of examples: 101
Accuracy: 0.9901
Gold label distribution (0=neg, 1=pos): Counter({1: 60, 0: 41})
Pred label distribution (0=neg, 1=pos): Counter({1: 61, 0: 40})


{'phrase': 'spielberg',
 'regex_used': False,
 'num_examples': 101,
 'accuracy': 0.9900990099009901,
 'gold_label_distribution': {0: 41, 1: 60},
 'pred_label_distribution': {0: 40, 1: 61}}

In [69]:

evaluate_phrase_subset(model, tokenizer, dataset["train"],
                       phrase="matthau")

Filter: 100%|██████████| 25000/25000 [00:00<00:00, 251850.86 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 401.10 examples/s]


Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 68
})
Phrase/Pattern: 'matthau' (regex=False)
Number of examples: 68
Accuracy: 0.9853
Gold label distribution (0=neg, 1=pos): Counter({1: 63, 0: 5})
Pred label distribution (0=neg, 1=pos): Counter({1: 62, 0: 6})


{'phrase': 'matthau',
 'regex_used': False,
 'num_examples': 68,
 'accuracy': 0.9852941176470589,
 'gold_label_distribution': {0: 5, 1: 63},
 'pred_label_distribution': {0: 6, 1: 62}}

In [71]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
from collections import defaultdict


if torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
elif torch.cuda.is_available():
    DEVICE = torch.device("cuda")
else:
    DEVICE = torch.device("cpu")

model.eval()

word_re = re.compile(r"[A-Za-z][A-Za-z'-]*")

def contains_word(text, target):
    words = set(word_re.findall(text.lower()))
    return target in words

def predict_batch(texts, batch_size=32):
    all_probs = []
    with torch.no_grad():
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i+batch_size]
            enc = tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(DEVICE)
            logits = model(**enc).logits
            probs = torch.softmax(logits, dim=-1)  # assume label 1=pos, 0=neg
            all_probs.append(probs.cpu())
    return torch.cat(all_probs, dim=0)

def stats_for_word(w, max_samples=None):
    idxs = []
    for i, ex in enumerate(test_data):
        if contains_word(ex["text"], w):
            idxs.append(i)
    if not idxs:
        return None

    if max_samples is not None and len(idxs) > max_samples:
        idxs = idxs[:max_samples]

    texts = [test_data[i]["text"] for i in idxs]
    gold = torch.tensor([test_data[i]["label"] for i in idxs])  # 1=pos, 0=neg

    probs = predict_batch(texts)
    pred = probs.argmax(dim=-1)

    n = len(idxs)
    gold_pos_rate = gold.float().mean().item()
    pred_pos_rate = (pred == 1).float().mean().item()
    acc = (pred == gold).float().mean().item()

    return {
        "word": w,
        "n": n,
        "gold_pos_rate": gold_pos_rate,
        "pred_pos_rate": pred_pos_rate,
        "acc": acc,
    }

# Example: inspect some suspects directly
suspect_words = ["spielberg", "tarantino", "seagal", "norris", "cassavetes", "sarandon"]

for w in suspect_words:
    s = stats_for_word(w)
    if s is None:
        print(f"{w}: not found in test set")
    else:
        print(
            f"{w:12s} n={s['n']:4d}  "
            f"P(y=pos|w)={s['gold_pos_rate']:.3f}  "
            f"P(ŷ=pos|w)={s['pred_pos_rate']:.3f}  "
            f"Acc={s['acc']:.3f}"
        )


spielberg    n=  53  P(y=pos|w)=0.453  P(ŷ=pos|w)=0.472  Acc=0.906
tarantino    n= 101  P(y=pos|w)=0.426  P(ŷ=pos|w)=0.436  Acc=0.970
seagal       n=  99  P(y=pos|w)=0.121  P(ŷ=pos|w)=0.202  Acc=0.919
norris       n=  68  P(y=pos|w)=0.059  P(ŷ=pos|w)=0.118  Acc=0.912
cassavetes   n=  53  P(y=pos|w)=0.925  P(ŷ=pos|w)=0.925  Acc=1.000
sarandon     n=  71  P(y=pos|w)=0.859  P(ŷ=pos|w)=0.845  Acc=0.958


In [72]:
def evaluate_suspects(words, label="pos", top_k=30):
    results = []
    for w in words[:top_k]:
        s = stats_for_word(w[0])
        if s is not None and s["n"] >= 20:
            results.append((w[0], s["n"], s["gold_pos_rate"], s["pred_pos_rate"], s["acc"]))
    print(f"\n{label} suspects on test set:")
    for w, n, gpos, ppos, acc in results:
        print(f"{w:20s} n={n:4d}  P(y=pos|w)={gpos:.3f}  P(ŷ=pos|w)={ppos:.3f}  Acc={acc:.3f}")

# assuming pos_suspects / neg_suspects from previous code:
evaluate_suspects(pos_suspects, label="pos")
evaluate_suspects(neg_suspects, label="neg")


pos suspects on test set:
first-rate           n=  44  P(y=pos|w)=0.727  P(ŷ=pos|w)=0.750  Acc=0.977
vulnerability        n=  36  P(y=pos|w)=0.833  P(ŷ=pos|w)=0.889  Acc=0.944
carell               n=  20  P(y=pos|w)=0.050  P(ŷ=pos|w)=0.100  Acc=0.950
flawless             n= 107  P(y=pos|w)=0.879  P(ŷ=pos|w)=0.916  Acc=0.963
enchanting           n=  38  P(y=pos|w)=0.868  P(ŷ=pos|w)=0.895  Acc=0.974
influential          n=  56  P(y=pos|w)=0.821  P(ŷ=pos|w)=0.804  Acc=0.875
matthau              n=  53  P(y=pos|w)=0.906  P(ŷ=pos|w)=0.925  Acc=0.943
kinnear              n=  20  P(y=pos|w)=0.700  P(ŷ=pos|w)=0.700  Acc=0.900
felix                n=  43  P(y=pos|w)=0.884  P(ŷ=pos|w)=0.884  Acc=0.907
layered              n=  23  P(y=pos|w)=0.826  P(ŷ=pos|w)=0.826  Acc=1.000
kelly's              n=  25  P(y=pos|w)=0.680  P(ŷ=pos|w)=0.640  Acc=0.880
devotion             n=  38  P(y=pos|w)=0.816  P(ŷ=pos|w)=0.895  Acc=0.921
transcends           n=  29  P(y=pos|w)=0.897  P(ŷ=pos|w)=0.897  Acc=1.00

In [73]:
import random

def flip_test(word_from, word_to, n_examples=50):
    indices = [
        i for i, ex in enumerate(test_data)
        if contains_word(ex["text"], word_from)
    ]
    if not indices:
        print(f"No examples with {word_from}")
        return

    random.shuffle(indices)
    indices = indices[:n_examples]

    orig_texts = []
    cf_texts = []
    for i in indices:
        text = test_data[i]["text"]
        # simple, case-insensitive replace on word boundaries
        # to be a bit safer, use regex:
        pattern = re.compile(rf"\b{word_from}\b", flags=re.IGNORECASE)
        if not pattern.search(text):
            continue
        cf_text = pattern.sub(word_to, text)
        orig_texts.append(text)
        cf_texts.append(cf_text)

    if not orig_texts:
        print("No suitable examples after regex filtering.")
        return

    orig_probs = predict_batch(orig_texts)
    cf_probs = predict_batch(cf_texts)

    orig_pred = orig_probs.argmax(dim=-1)
    cf_pred = cf_probs.argmax(dim=-1)

    flips = (orig_pred != cf_pred).sum().item()
    print(
        f"{word_from} → {word_to}: "
        f"{flips}/{len(orig_texts)} predictions flipped "
        f"({flips/len(orig_texts):.3f})"
    )

# Example: does swapping Spielberg with Seagal change sentiment?
flip_test("spielberg", "seagal", n_examples=100)
flip_test("seagal", "spielberg", n_examples=100)


spielberg → seagal: 1/53 predictions flipped (0.019)
seagal → spielberg: 0/99 predictions flipped (0.000)


In [74]:
def deletion_test(word, n_examples=50):
    indices = [
        i for i, ex in enumerate(test_data)
        if contains_word(ex["text"], word)
    ]
    random.shuffle(indices)
    indices = indices[:n_examples]

    orig_texts = []
    cf_texts = []
    for i in indices:
        text = test_data[i]["text"]
        pattern = re.compile(rf"\b{word}\b", flags=re.IGNORECASE)
        if pattern.search(text):
            orig_texts.append(text)
            cf_texts.append(pattern.sub("", text))

    orig_probs = predict_batch(orig_texts)
    cf_probs = predict_batch(cf_texts)

    orig_pred = orig_probs.argmax(dim=-1)
    cf_pred = cf_probs.argmax(dim=-1)

    flips = (orig_pred != cf_pred).sum().item()
    print(
        f"Delete {word}: {flips}/{len(orig_texts)} predictions flipped "
        f"({flips/len(orig_texts):.3f})"
    )

deletion_test("spielberg", n_examples=100)


Delete spielberg: 0/53 predictions flipped (0.000)


In [75]:
import math

# Collect candidate words
cand_words = sorted({
    w for (w, *_ ) in pos_suspects
} | {
    w for (w, *_ ) in neg_suspects
})

min_n = 30      # minimum test occurrences to trust the estimate
top_k = 30      # how many to display

amplifications = []  # (word, amp, n, gold_pos, pred_pos, acc)

for w in cand_words:
    s = stats_for_word(w)  # uses your existing function
    if s is None:
        continue
    n = s["n"]
    if n < min_n:
        continue

    gold_pos = s["gold_pos_rate"]
    pred_pos = s["pred_pos_rate"]
    amp = pred_pos - gold_pos  # >0: model more positive than data; <0: more negative

    amplifications.append((w, amp, n, gold_pos, pred_pos, s["acc"]))

# Sort by strongest over-positivization
over_pos = sorted(amplifications, key=lambda x: x[1], reverse=True)

print("Top words where model is MORE positive than the data (potential positive shortcuts):")
for w, amp, n, g, p, acc in over_pos[:top_k]:
    print(f"{w:20s} n={n:4d}  amp=+{amp:.3f}  P(y=pos|w)={g:.3f}  P(ŷ=pos|w)={p:.3f}  Acc={acc:.3f}")

# Sort by strongest over-negativization
over_neg = sorted(amplifications, key=lambda x: x[1])

print("\nTop words where model is MORE negative than the data (potential negative shortcuts):")
for w, amp, n, g, p, acc in over_neg[:top_k]:
    print(f"{w:20s} n={n:4d}  amp={amp:.3f}  P(y=pos|w)={g:.3f}  P(ŷ=pos|w)={p:.3f}  Acc={acc:.3f}")


Top words where model is MORE positive than the data (potential positive shortcuts):
montana              n=  34  amp=+0.118  P(y=pos|w)=0.647  P(ŷ=pos|w)=0.765  Acc=0.882
formidable           n=  30  amp=+0.100  P(y=pos|w)=0.633  P(ŷ=pos|w)=0.733  Acc=0.900
fast-forward         n=  30  amp=+0.100  P(y=pos|w)=0.200  P(ŷ=pos|w)=0.300  Acc=0.900
gentle               n=  99  amp=+0.091  P(y=pos|w)=0.707  P(ŷ=pos|w)=0.798  Acc=0.909
damme                n=  56  amp=+0.089  P(y=pos|w)=0.286  P(ŷ=pos|w)=0.375  Acc=0.839
maturity             n=  34  amp=+0.088  P(y=pos|w)=0.824  P(ŷ=pos|w)=0.912  Acc=0.853
scarlett             n=  46  amp=+0.087  P(y=pos|w)=0.543  P(ŷ=pos|w)=0.630  Acc=0.870
heartfelt            n=  58  amp=+0.086  P(y=pos|w)=0.690  P(ŷ=pos|w)=0.776  Acc=0.879
semi                 n=  35  amp=+0.086  P(y=pos|w)=0.343  P(ŷ=pos|w)=0.429  Acc=0.914
sweetheart           n=  37  amp=+0.081  P(y=pos|w)=0.703  P(ŷ=pos|w)=0.784  Acc=0.919
atrocity             n=  37  amp=+0.081  P(y=