# B3 — Verbalizer (Semantic to Sentence)

Goal: Convert structured semantics (slots or tokens) into **human-readable, grounded sentences**.
- Deterministic templates = no hallucination.
- Easy to compare GT vs A2/B2a predictions.
- Produces "caption-like" outputs for demos and qualitative analysis.


In [None]:
CONTEXT_MAP = {
    "0": "Unknown",
    "1": "Separation",
    "2": "Biting",
    "3": "Feeding",
    "4": "Fighting",
    "5": "Grooming",
    "6": "Isolation",
    "7": "Kissing",
    "8": "Landing",
    "9": "Mating_protest",
    "10": "Threat_like",
    "11": "General",
    "12": "Sleeping",
}

PRE_ACTION_MAP = {
    "0": "Unknown",
    "1": "Fly_in",
    "2": "Present",
    "3": "Crawl_in",
}

POST_ACTION_MAP = {
    "0": "Unknown",
    "1": "Cower",
    "2": "Fly_away",
    "3": "Stay",
    "4": "Crawl_away",
}

SLOTS = [
    "emitter",
    "addressee",
    "context",
    "emitter_pre",
    "addressee_pre",
    "emitter_post",
    "addressee_post",
]

def _s(x) -> str:
    return str(x).strip()

def normalize_slot_value(slot: str, raw) -> str:
    """
    Accepts either:
      - numeric codes ('3')
      - string labels ('Fighting', 'Fly_away')
    """
    if raw is None:
        return "unknown"

    v = str(raw).strip()
    if v == "":
        return "unknown"

    if not v.isdigit():
        return v

    # Numeric → lookup
    if slot == "context":
        return CONTEXT_MAP.get(v, "unknown")

    if slot in ("emitter_pre", "addressee_pre"):
        return PRE_ACTION_MAP.get(v, "unknown")

    if slot in ("emitter_post", "addressee_post"):
        return POST_ACTION_MAP.get(v, "unknown")

    # emitter / addressee IDs
    return v

In [None]:
def _humanize_label(x: str) -> str:
    # Convert "Threat_like" -> "threat-like", "Mating_protest" -> "mating protest"
    x = (x or "").replace("_", " ").strip()
    if x == "":
        return "unknown"
    # lowercase, but keep first letter for sentence starts when needed
    return x.lower()

def action_phrase(actor: str, action: str, when: str) -> str:
    """
    actor: 'emitter' or 'addressee'
    action: e.g. 'Fly_away'
    when: 'before' or 'after'
    """
    a = _humanize_label(action)
    if a == "unknown":
        return f"The {actor}'s behavior {when} the call is unknown."
    # make it read naturally:
    if when == "before":
        return f"Before the call, the {actor} was {a}."
    return f"After the call, the {actor} did: {a}."

def context_sentence(context: str) -> str:
    c = _humanize_label(context)
    if c == "unknown":
        return "The interaction context is unknown."
    return f"The interaction context is {c}."

In [None]:
IRREGULAR_VERBS = {
    "fly away": "flew away",
    "crawl in": "crawled in",
    "crawl away": "crawled away",
    "stay": "stayed",
    "present": "was present",
}

def past_tense(phrase: str) -> str:
    p = phrase.lower().replace("_", " ")
    if p in IRREGULAR_VERBS:
        return IRREGULAR_VERBS[p]
    if p.startswith("was "):
        return p
    if p.endswith("e"):
        return p + "d"
    return p + "ed"

def humanize(x: str) -> str:
    return x.replace("_", " ").lower()


In [None]:
def slots_to_sentence(slots: dict, style: str = "narrative") -> str:
    emitter = normalize_slot_value("emitter", slots.get("emitter"))
    addressee = normalize_slot_value("addressee", slots.get("addressee"))
    context = humanize(normalize_slot_value("context", slots.get("context")))

    e_pre  = humanize(normalize_slot_value("emitter_pre", slots.get("emitter_pre")))
    a_pre  = humanize(normalize_slot_value("addressee_pre", slots.get("addressee_pre")))
    e_post = humanize(normalize_slot_value("emitter_post", slots.get("emitter_post")))
    a_post = humanize(normalize_slot_value("addressee_post", slots.get("addressee_post")))

    # Sentence 1: interaction
    s1 = (
        f"Bat {emitter} vocalized toward bat {addressee}"
        + (f" during a {context} interaction." if context != "unknown" else ".")
    )

    # Sentence 2: before
    s2 = (
        f"Before vocalizing, the emitter {past_tense(e_pre)} "
        f"and the addressee {past_tense(a_pre)}."
    )

    # Sentence 3: after
    s3 = (
        f"Afterward, the emitter {past_tense(e_post)} "
        f"while the addressee {past_tense(a_post)}."
    )

    return " ".join([s1, s2, s3])


In [None]:
def tokens_to_slots(token_caption: str) -> dict:
    """
    token_caption like:
      'EMITTER_111 ADDRESSEE_221 CONTEXT_Fighting E_PRE_Present A_PRE_Present E_POST_Fly_away A_POST_Stay'
    Returns slot dict compatible with slots_to_sentence.
    """
    parts = [p.strip() for p in str(token_caption).split() if p.strip()]
    out = {}

    for p in parts:
        if p.startswith("EMITTER_"):
            out["emitter"] = p[len("EMITTER_"):]
        elif p.startswith("ADDRESSEE_"):
            out["addressee"] = p[len("ADDRESSEE_"):]
        elif p.startswith("CONTEXT_"):
            out["context"] = p[len("CONTEXT_"):]
        elif p.startswith("E_PRE_"):
            out["emitter_pre"] = p[len("E_PRE_"):]
        elif p.startswith("A_PRE_"):
            out["addressee_pre"] = p[len("A_PRE_"):]
        elif p.startswith("E_POST_"):
            out["emitter_post"] = p[len("E_POST_"):]
        elif p.startswith("A_POST_"):
            out["addressee_post"] = p[len("A_POST_"):]
    return out

In [None]:
def build_b3_from_ground_truth(labels_raw: dict, style: str = "narrative"):
    N = len(next(iter(labels_raw.values())))
    captions = []
    for i in range(N):
        slots = {k: labels_raw[k][i] for k in SLOTS}
        captions.append(slots_to_sentence(slots, style=style))
    return captions

def build_b3_from_a2_predictions(pred_labels_raw: dict, style: str = "narrative"):
    N = len(next(iter(pred_labels_raw.values())))
    captions = []
    for i in range(N):
        slots = {k: pred_labels_raw[k][i] for k in SLOTS}
        captions.append(slots_to_sentence(slots, style=style))
    return captions

def build_b3_from_b2a_token_captions(token_captions: list, style: str = "narrative"):
    captions = []
    for cap in token_captions:
        slots = tokens_to_slots(cap)
        captions.append(slots_to_sentence(slots, style=style))
    return captions

In [None]:
# Ground truth B3
b3_gt = build_b3_from_ground_truth(labels_raw, style="narrative")

print("GT example:\n", b3_gt[0])

b3_a2 = build_b3_from_a2_predictions(pred_labels_raw, style="narrative")

b3_b2a = build_b3_from_b2a_token_captions(b2a_token_captions, style="narrative")

# Compare a few indices
idxs = [0, 1, 2, 100]
for i in idxs:
    print("\n" + "="*80)
    print("Index:", i)
    print("GT :", b3_gt[i])
    if 'b3_a2' in globals(): print("A2 :", b3_a2[i])
    if 'b3_b2a' in globals(): print("B2a:", b3_b2a[i])

GT example:
 Bat 216 vocalized toward bat 221 during a general interaction. Before vocalizing, the emitter was present and the addressee crawled in. Afterward, the emitter stayed while the addressee stayed.

Index: 0
GT : Bat 216 vocalized toward bat 221 during a general interaction. Before vocalizing, the emitter was present and the addressee crawled in. Afterward, the emitter stayed while the addressee stayed.
A2 : Bat 216 vocalized toward bat 208 during a feeding interaction. Before vocalizing, the emitter was present and the addressee crawled in. Afterward, the emitter crawled away while the addressee stayed.
B2a: Bat 111 vocalized toward bat 221 during a fighting interaction. Before vocalizing, the emitter was present and the addressee was present. Afterward, the emitter flyed while the addressee stayed.

Index: 1
GT : Bat 215 vocalized toward bat 220 during a sleeping interaction. Before vocalizing, the emitter was present and the addressee was present. Afterward, the emitter sta