In [1]:
import json
import pandas as pd
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, Markdown

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [7]:
# Load most recent .jsonl file
output_dir = Path("../outputs")
output_files = sorted(output_dir.glob("responses_*.jsonl"), reverse=True)
output_path = output_files[0] if output_files else None

if output_path is None:
    raise FileNotFoundError("No output files found in /outputs.")

print(f"Loaded file: {output_path.name}")

Loaded file: responses_20250709_1906.jsonl


In [None]:
# Load JSONL into a list and then convert them to pandas dataframe
rows = []
with open(output_path, "r", encoding = "utf-8") as f:
    for line in f:
        rows.append(json.loads(line))

df = pd.DataFrame(rows)
df.head()

Unnamed: 0,timestamp,model,model_id,prompt,category,note,response
0,2025-07-09T19:06:46,gemma,gemma2-9b-it,"Translate the sentence 'Good morning, my frien...",translation,Tests indigenous language support and translat...,"There isn't a single perfect translation for ""..."
1,2025-07-09T19:06:47,llama,llama-3.1-8b-instant,"Translate the sentence 'Good morning, my frien...",translation,Tests indigenous language support and translat...,"To translate the sentence 'Good morning, my fr..."
2,2025-07-09T19:06:48,gemma,gemma2-9b-it,Translate this sentence to formal Spanish: 'Oy...,translation,Tests dialect and formality shift from colloqu...,"Here are a few ways to translate ""Oye, pásame ..."
3,2025-07-09T19:06:48,llama,llama-3.1-8b-instant,Translate this sentence to formal Spanish: 'Oy...,translation,Tests dialect and formality shift from colloqu...,"La traducción formal de la oración es: ""Escuch..."
4,2025-07-09T19:06:49,gemma,gemma2-9b-it,Translate the phrase 'Home is where the heart ...,translation,"Tests abstract, idiomatic translation handling.","The most common translation of ""Home is where ..."


In [None]:
# Pivot to compare model responses side by side
pivot = df.pivot_table(
    index = ["prompt", "category", "note"],
    columns = "model",
    values = "response",
    aggfunc = "first"
).reset_index()

In [None]:
# Create a combobox to interact
categories = sorted(df["category"].unique())
dropdown = widgets.Dropdown(options = ["All"] + categories, description = "Category:")

In [16]:
def display_responses(selected_category):
    display(Markdown(f"## Prompt Comparison – Category: `{selected_category}`"))

    filtered = pivot if selected_category == "All" else pivot[pivot["category"] == selected_category]
    models = df["model"].unique()

    for _, row in filtered.iterrows():
        display(Markdown(f"### Prompt: {row['prompt']}"))
        display(Markdown(f"- **Category:** {row['category']}  \n- **Note:** {row['note']}"))

        for model in models:
            content = row.get(model)
            if content:
                display(Markdown(f"<br> **{model.upper()}:**\n\n {content.strip()}\n"))
        display(Markdown("---\n\n"))

widgets.interact(display_responses, selected_category = dropdown)

interactive(children=(Dropdown(description='Category:', index=1, options=('All', 'critique', 'logic', 'math', …

<function __main__.display_responses(selected_category)>