<a href="https://colab.research.google.com/github/suleiman-odeh/NLP_Project_Team16/blob/main/Llama_3/zero_shot_indirect_Llama_3_8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip -q install -U transformers bitsandbytes accelerate huggingface_hub scikit-learn tqdm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
"""
Model loader (4-bit).
Keep output cleared before pushing to GitHub.
"""

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import login

login()

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

print(f"Loading {model_id} in 4-bit...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model.eval()
print("Model loaded.")


In [7]:
"""
Load QEvasion cleaned file and select test split.
"""

import pandas as pd

DATA_FILE = "QEvasion_cleaned.jsonl"

df = pd.read_json(DATA_FILE, lines=True)
test_df = df[df["split_type"] == "test"].copy()

print("Test rows:", len(test_df))
print(test_df[["question", "cleaned_answer", "annotator1_id", "annotator2_id", "annotator3_id"]].head(2))


Test rows: 308
                                               question  \
3448   Inquiring about the status or information reg...   
3449  Will you invite them to the White House to neg...   

                                         cleaned_answer  annotator1_id  \
3448  Well, the world has made it clear that these t...              4   
3449  I think that anytime and anyplace that they ar...              3   

      annotator2_id  annotator3_id  
3448              6              4  
3449              6              6  


In [8]:
"""
Indirect prompt.
We ask for a single number 1..9 to keep parsing stable.
"""

import re

# Map model output code dataset evasion_id
CODE_TO_EVASION_ID = {
    1: 5,  # Explicit
    2: 7,  # Implicit
    3: 6,  # General
    4: 8,  # Partial/half-answer
    5: 4,  # Dodging
    6: 3,  # Deflection
    7: 2,  # Declining to answer
    8: 0,  # Claims ignorance
    9: 1,  # Clarification
}

ID_TO_LABEL = {
    0: "Claims ignorance",
    1: "Clarification",
    2: "Declining to answer",
    3: "Deflection",
    4: "Dodging",
    5: "Explicit",
    6: "General",
    7: "Implicit",
    8: "Partial/half-answer",
}

def make_indirect_prompt(question, answer):
    return f"""You are given an interview question and answer.
Classify the answer using exactly one of the following evasion strategies:

1. Explicit
2. Implicit
3. General
4. Partial/half-answer
5. Dodging
6. Deflection
7. Declining to answer
8. Claims ignorance
9. Clarification

Return ONLY the number (1-9). No explanation.

Answer: {answer}

Question: {question}

Taxonomy code:"""

def parse_indirect_output(text):
    t = str(text).strip().lower()

    # Grab first digit 1..9 anywhere in the output
    m = re.search(r"\b([1-9])\b", t)
    if not m:
        return -1

    code = int(m.group(1))
    return CODE_TO_EVASION_ID.get(code, -1)


In [9]:
"""
Zero-shot loop.
We keep max_new_tokens small because we want only a digit.
"""

import torch
from tqdm import tqdm

@torch.no_grad()
def llama_generate(prompt, max_new_tokens=5):
    messages = [{"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer([text], return_tensors="pt").to(model.device)

    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False,
        temperature=0.1,
        pad_token_id=tokenizer.eos_token_id,
        use_cache=True
    )

    # Decode only new tokens
    gen = out[0][inputs["input_ids"].shape[1]:]
    return tokenizer.decode(gen, skip_special_tokens=True).strip()

pred_ids = []
raw_outs = []

print("Running Llama-3 indirect zero-shot...")

debug_n = 5
for i, row in tqdm(test_df.reset_index(drop=True).iterrows(), total=len(test_df), disable=False):
    prompt = make_indirect_prompt(row["question"], row["cleaned_answer"])
    raw = llama_generate(prompt, max_new_tokens=5)
    pred = parse_indirect_output(raw)

    if i < debug_n:
        print(f"\n[DEBUG {i+1}] raw={repr(raw)} | pred_id={pred} ({ID_TO_LABEL.get(pred,'?')})")

    raw_outs.append(raw)
    pred_ids.append(pred)

test_df["raw_output"] = raw_outs
test_df["pred_evasion_id"] = pred_ids
test_df["pred_evasion_label"] = test_df["pred_evasion_id"].map(ID_TO_LABEL)

print("\nDone.")
print("Parsing failures (-1):", sum(p == -1 for p in pred_ids))

Running Llama-3 indirect zero-shot...


  0%|          | 0/308 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|          | 1/308 [00:03<15:25,  3.02s/it]


[DEBUG 1] raw='6' | pred_id=3 (Deflection)


  1%|          | 2/308 [00:05<12:39,  2.48s/it]


[DEBUG 2] raw='4' | pred_id=8 (Partial/half-answer)


  1%|          | 3/308 [00:08<15:16,  3.00s/it]


[DEBUG 3] raw='4' | pred_id=8 (Partial/half-answer)


  1%|▏         | 4/308 [00:12<16:36,  3.28s/it]


[DEBUG 4] raw='4' | pred_id=8 (Partial/half-answer)


  2%|▏         | 5/308 [00:14<14:29,  2.87s/it]


[DEBUG 5] raw='4' | pred_id=8 (Partial/half-answer)


100%|██████████| 308/308 [19:12<00:00,  3.74s/it]


Done.
Parsing failures (-1): 0





In [10]:
"""
Evaluation using annotator1_id/annotator2_id/annotator3_id.
Match-any scoring logic.
"""

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pandas as pd

valid_df = test_df[test_df["pred_evasion_id"] != -1].copy()
print("Valid rows:", len(valid_df), "/", len(test_df))

def pick_ground_truth(row):
    pred = int(row["pred_evasion_id"])
    humans = [row["annotator1_id"], row["annotator2_id"], row["annotator3_id"]]

    human_ids = []
    for h in humans:
        if pd.notna(h):
            try:
                human_ids.append(int(h))
            except:
                pass

    if pred in human_ids:
        return pred
    return human_ids[0] if human_ids else -1

valid_df["true_evasion_id"] = valid_df.apply(pick_ground_truth, axis=1)
final_df = valid_df[valid_df["true_evasion_id"] != -1].copy()

y_true = final_df["true_evasion_id"].astype(int)
y_pred = final_df["pred_evasion_id"].astype(int)

acc = accuracy_score(y_true, y_pred)
prec_macro = precision_score(y_true, y_pred, average="macro", zero_division=0)
rec_macro  = recall_score(y_true, y_pred, average="macro", zero_division=0)
f1_macro   = f1_score(y_true, y_pred, average="macro")

prec_weighted = precision_score(y_true, y_pred, average="weighted", zero_division=0)
rec_weighted  = recall_score(y_true, y_pred, average="weighted", zero_division=0)
f1_weighted   = f1_score(y_true, y_pred, average="weighted")

print("\n" + "="*60)
print("INDIRECT RESULTS | Llama-3-8B-Instruct | Zero-shot | TEST")
print("="*60)
print(f"Accuracy:           {acc:.4f}")
print("-"*30)
print(f"Macro Precision:    {prec_macro:.4f}")
print(f"Macro Recall:       {rec_macro:.4f}")
print(f"Macro F1:           {f1_macro:.4f}")
print("-"*30)
print(f"Weighted Precision: {prec_weighted:.4f}")
print(f"Weighted Recall:    {rec_weighted:.4f}")
print(f"Weighted F1:        {f1_weighted:.4f}")
print("-"*60)

target_names = [
    "Claims ignorance (0)",
    "Clarification (1)",
    "Declining to answer (2)",
    "Deflection (3)",
    "Dodging (4)",
    "Explicit (5)",
    "General (6)",
    "Implicit (7)",
    "Partial/half-answer (8)",
]

print(classification_report(y_true, y_pred, labels=list(range(9)), target_names=target_names, zero_division=0))

Valid rows: 308 / 308

INDIRECT RESULTS | Llama-3-8B-Instruct | Zero-shot | TEST
Accuracy:           0.2045
------------------------------
Macro Precision:    0.3012
Macro Recall:       0.2174
Macro F1:           0.1544
------------------------------
Weighted Precision: 0.4870
Weighted Recall:    0.2045
Weighted F1:        0.2005
------------------------------------------------------------
                         precision    recall  f1-score   support

   Claims ignorance (0)       0.25      0.36      0.30        11
      Clarification (1)       0.00      0.00      0.00         4
Declining to answer (2)       0.50      0.20      0.29        10
         Deflection (3)       0.06      0.03      0.04        29
            Dodging (4)       0.75      0.05      0.10        56
           Explicit (5)       0.73      0.08      0.15        95
            General (6)       0.00      0.00      0.00        23
           Implicit (7)       0.39      0.55      0.46        74
Partial/half-answer (

In [11]:
"""
evaluation derived from the 9-class evasion predictions.
we map evasion_id to clarity_id then score to clarity_id.
"""

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

def evasion_to_clarity(eid: int) -> int:
    # clarity_id: 0 Clear Reply, 1 Ambivalent, 2 Clear Non-Reply
    if eid == 5:                 # Explicit
        return 0
    if eid in [7, 6, 8, 4, 3]:   # Implicit
        return 1
    if eid in [2, 0, 1]:         # Declining
        return 2
    return -1

# create predicted clarity from predicted evasion
clar_df = test_df[test_df["pred_evasion_id"] != -1].copy()
clar_df["pred_clarity_id"] = clar_df["pred_evasion_id"].astype(int).apply(evasion_to_clarity)
clar_df["true_clarity_id"] = clar_df["clarity_id"].astype(int)

# safety filter should keep all rows
clar_df = clar_df[clar_df["pred_clarity_id"] != -1].copy()

y_true_c = clar_df["true_clarity_id"]
y_pred_c = clar_df["pred_clarity_id"]

acc_c = accuracy_score(y_true_c, y_pred_c)

prec_macro_c = precision_score(y_true_c, y_pred_c, average="macro", zero_division=0)
rec_macro_c  = recall_score(y_true_c, y_pred_c, average="macro", zero_division=0)
f1_macro_c   = f1_score(y_true_c, y_pred_c, average="macro")

prec_weighted_c = precision_score(y_true_c, y_pred_c, average="weighted", zero_division=0)
rec_weighted_c  = recall_score(y_true_c, y_pred_c, average="weighted", zero_division=0)
f1_weighted_c   = f1_score(y_true_c, y_pred_c, average="weighted")

print("\n" + "="*60)
print("TASK 1: CLARITY (3-Class) — mapped from indirect (9-class) predictions")
print("="*60)
print(f"Accuracy:           {acc_c:.4f}")
print("-"*30)
print(f"Macro Precision:    {prec_macro_c:.4f}")
print(f"Macro Recall:       {rec_macro_c:.4f}")
print(f"Macro F1:           {f1_macro_c:.4f}")
print("-"*30)
print(f"Weighted Precision: {prec_weighted_c:.4f}")
print(f"Weighted Recall:    {rec_weighted_c:.4f}")
print(f"Weighted F1:        {f1_weighted_c:.4f}")
print("-"*60)

print(classification_report(
    y_true_c,
    y_pred_c,
    target_names=["Clear Reply (0)", "Ambivalent (1)", "Clear Non-Reply (2)"],
    zero_division=0
))



TASK 1: CLARITY (3-Class) — mapped from indirect (9-class) predictions
Accuracy:           0.6623
------------------------------
Macro Precision:    0.5092
Macro Recall:       0.4358
Macro F1:           0.4081
------------------------------
Weighted Precision: 0.6305
Weighted Recall:    0.6623
Weighted F1:        0.5905
------------------------------------------------------------
                     precision    recall  f1-score   support

    Clear Reply (0)       0.55      0.08      0.13        79
     Ambivalent (1)       0.70      0.93      0.80       206
Clear Non-Reply (2)       0.28      0.30      0.29        23

           accuracy                           0.66       308
          macro avg       0.51      0.44      0.41       308
       weighted avg       0.63      0.66      0.59       308



In [13]:
"""
Save outputs for later comparison
"""

test_df.to_csv("full_test_dataset_zs_indirect_llama3.csv", index=True)
print("Saved: full_test_dataset_zs_indirect_llama3.csv")

cols = [
    "pred_evasion_id",
    "pred_evasion_label",
    "pred_clarity_id_from_indirect",
    "clarity_id",
    "annotator1_id",
    "annotator2_id",
    "annotator3_id",
    "raw_output",
]

# Keep only columns that actually exist (prevents KeyError)
cols = [c for c in cols if c in test_df.columns]

mini_df = test_df[cols].copy()
mini_df.to_csv("predictions_comparison_indirect_llama3.csv", index=True)
print("Saved: predictions_comparison_indirect_llama3.csv")
print("Mini columns:", cols)


Saved: full_test_dataset_zs_indirect_llama3.csv
Saved: predictions_comparison_indirect_llama3.csv
Mini columns: ['pred_evasion_id', 'pred_evasion_label', 'clarity_id', 'annotator1_id', 'annotator2_id', 'annotator3_id', 'raw_output']
