In [6]:
from ipywidgets import Button, HBox, VBox, Output
from IPython.display import display, clear_output
import json
import os
import textwrap

# Tool for Manual Tag of Experiments

In [7]:
# Indexes of questions to tag
TAG_RANGE = [10,20]

# Select a model's responses to tag
# MODEL_NAME = "Meta-Llama-3-3-70B-Instruct-AWQ-INT4"
# MODEL_NAME = "gpt-oss-20b"
MODEL_NAME = "Qwen3-30B-A3B-GPTQ-Int4"

### Seu-up

In [8]:
# Set paths
DATA_PATH = f'../data/{MODEL_NAME}/chat/'
OUTPUT_FILE = os.path.join(DATA_PATH, "human_labels.jsonl")   # Where results are saved

# Load previously labeled entries (if any)
labeled_set = set()
if os.path.exists(OUTPUT_FILE):
    with open(OUTPUT_FILE, "r") as f:
        for line in f:
            try:
                entry = json.loads(line)
                key = (entry["dataset"], entry["file"], entry["line"])
                labeled_set.add(key)
            except json.JSONDecodeError:
                pass  # Ignore corrupted lines

# Load all jsonl entries from all files
jsonl_files = []
for dataset in os.listdir(DATA_PATH):
    if os.path.isdir(os.path.join(DATA_PATH, dataset)):
        jsonl_files = [[os.path.join(DATA_PATH, dataset, f), dataset, f] for f in os.listdir(os.path.join(DATA_PATH, dataset)) if f.endswith(".jsonl")]

# Prepare all iterations
all_entries = []
running_count = 0
process_already = 0
for fpath, fdataset, fname in jsonl_files:
    with open(fpath, 'r') as f:
        for i, line in enumerate(f):
            if i < TAG_RANGE[0] or i>=TAG_RANGE[1]:
                # Out of scope in this run
                continue
            skip = False
            for proc_dataset, proc_file, proc_line in labeled_set:
                if (fdataset == proc_dataset) and (fname==proc_file) and (i == proc_line):
                    #already processed
                    process_already += 1
                    skip = True
                    break
            if not skip:
                try:
                    obj = json.loads(line)
                    all_entries.append((fdataset, fname, i, obj, running_count))
                    running_count+=1
                except json.JSONDecodeError:
                    print(f"Skipping invalid JSON in dataset {fdataset}, file {fname}, line {i}")
total_run = len(all_entries)

In [9]:
# Construct UI elements
out = Output()
btn_correct = Button(description="Correct", button_style="success")
btn_wrong = Button(description="Wrong", button_style="danger")
btn_refusal = Button(description="Refusal", button_style="danger")
btn_qst_mistake = Button(description="Question Mistake", button_style="danger")
buttons_box = HBox([btn_correct, btn_wrong, btn_refusal, btn_qst_mistake])
main_box = VBox([buttons_box, out])

# Results and state
results = []
entry_iter = iter(all_entries)
current_entry = None

def save_entry_to_file(entry):
    with open(OUTPUT_FILE, "a") as f:
        f.write(json.dumps(entry) + "\n")

def show_next_entry():
    global current_entry
    try:
        current_entry = next(entry_iter)
        _, filename, line_number, obj, running_count = current_entry

        with out:
            clear_output(wait=True)
            print(f"Element {running_count+1} of {total_run}")
            print(f"File: {filename}\tQuestion: {line_number}")
            target = obj.get('target', None)
            if target is None:
                target = obj.get('answer', None)
            if target is None:
                raise ValueError(f"Target not found in: File: {filename}\tQuestion: {line_number}")
            question = obj['doc'].get('question', None)
            if question is None:
                raise ValueError(f"Question not found in: File: {filename}\tQuestion: {line_number}")
            print(f"\nQuestion: {question}")
            print(f"\nTarget: {target}")
            lines = obj['resps'][0][0].splitlines()
            wrapped = "\n".join(textwrap.fill(line, width=120) for line in lines)
            print("\nResponse:\n", wrapped)

    except StopIteration:
        with out:
            clear_output()
            print("✅ All lines processed.")
            print("Final results:")
            for r in results:
                print(r)

def on_click(label):
    def handler(_):
        if current_entry:
            dataset_name, filename, line_number, _ , _ = current_entry
            result = {
                "dataset": dataset_name,
                "file": filename,
                "line": line_number,
                "label": label
            }
            results.append(result)
            save_entry_to_file(result)
        show_next_entry()
    return handler

btn_correct.on_click(on_click("Correct"))
btn_wrong.on_click(on_click("Wrong"))
btn_refusal.on_click(on_click("Refusal"))
btn_qst_mistake.on_click(on_click("Question Mistake"))

# Run the UI

In [10]:
# Display interface once
display(main_box)
show_next_entry()


VBox(children=(HBox(children=(Button(button_style='success', description='Correct', style=ButtonStyle()), Butt…