# LLM Labeling Demo (API-only)

This notebook calls the OpenAI API directly.

You paste your key, run, and it will:
label each review with structured JSON (`is_political`, `reasoning`),
run a second-pass rewrite to enforce a word limit.


In [None]:
# === API key (paste yours) ===
API_KEY = "PASTE_YOUR_KEY_HERE"  # e.g., "sk-..."

import os
if API_KEY and API_KEY != "PASTE_YOUR_KEY_HERE":
    os.environ["OPENAI_API_KEY"] = API_KEY

# Recommended:
# !pip -q install -U openai pydantic pandas


In [None]:
from pathlib import Path
import pandas as pd
import random

DATA_PATH = Path("fake_movie_reviews.csv")

def maybe_make_fake_dataset(path: Path, n: int = 30, seed: int = 7):
    random.seed(seed)
    political = [
        "This is woke propaganda dressed as a movie.",
        "Another SJW agenda push. Hard pass.",
        "Left/right culture-war nonsense ruined the plot.",
        "Pure identity politics. Not cinema.",
        "Party politics in superhero form. Obvious agenda.",
        "This felt like partisan messaging, not storytelling.",
    ]
    nonpolitical = [
        "Bad pacing and weak dialogue.",
        "The acting was fine but the plot was messy.",
        "Great visuals, mediocre script.",
        "Too long; the third act dragged.",
        "Sound mixing was awful in my theater.",
        "Fun movie, not perfect, but enjoyable.",
    ]
    rows = []
    for i in range(1, n+1):
        if random.random() < 0.35:
            txt = random.choice(political)
            y = 1
        else:
            txt = random.choice(nonpolitical)
            y = 0
        if random.random() < 0.25:
            txt += " " + random.choice(["Seriously.", "LOL.", "Just my opinion."])
        rows.append({"review_id": i, "review_text": txt.strip(), "true_is_political": y})
    pd.DataFrame(rows).to_csv(path, index=False)

if not DATA_PATH.exists():
    maybe_make_fake_dataset(DATA_PATH)

df = pd.read_csv(DATA_PATH)
df.head()


## Rubric + structured output

We use the SDK `parse()` helper so we never do `json.loads(...)`.


In [None]:
import re
from pydantic import BaseModel, Field
from openai import OpenAI

client = OpenAI()  # reads OPENAI_API_KEY
MODEL = "gpt-5-mini"  # or "gpt-5"

RUBRIC = (
  "Label is_political=1 ONLY if the review explicitly uses political / ideology / culture-war framing "
  "(e.g., woke, SJW, propaganda, left/right, party politics, identity politics, agenda). "
  "If it's just 'preachy' or 'message' without explicit politics, label 0."
)

class PoliticalLabel(BaseModel):
    is_political: int = Field(..., description="0 or 1")
    reasoning: str = Field(..., description="brief explanation")

def build_prompt(review_text: str) -> str:
    return (
        f"Rubric: {RUBRIC}\n\n"
        "Task: Read the review and output JSON with fields {is_political, reasoning}.\n"
        "- is_political must be 0 or 1\n"
        "- reasoning should be <= 50 words (soft)\n\n"
        f"Review: {review_text}"
    )

def word_count(s: str) -> int:
    return len(re.findall(r"\b\w+\b", s))


In [None]:
def label_one(review_text: str) -> dict:
    prompt = build_prompt(review_text)
    completion = client.chat.completions.parse(
        model=MODEL,
        messages=[
            {"role": "system", "content": "Return JSON only."},
            {"role": "user", "content": prompt},
        ],
        response_format=PoliticalLabel,
    )
    msg = completion.choices[0].message
    if getattr(msg, "refusal", None):
        raise RuntimeError(msg.refusal)
    parsed = msg.parsed
    return parsed.model_dump() if hasattr(parsed, "model_dump") else parsed.dict()

def enforce_reasoning_limit(result: dict, max_words: int = 50) -> dict:
    prompt_2 = f"Rewrite reasoning to <= {max_words} words. Keep is_political unchanged. JSON only."
    completion = client.chat.completions.parse(
        model=MODEL,
        messages=[
            {"role": "system", "content": "Return JSON only."},
            {"role": "assistant", "content": str(result)},
            {"role": "user", "content": prompt_2},
        ],
        response_format=PoliticalLabel,
    )
    msg = completion.choices[0].message
    if getattr(msg, "refusal", None):
        raise RuntimeError(msg.refusal)
    parsed = msg.parsed
    out = parsed.model_dump() if hasattr(parsed, "model_dump") else parsed.dict()
    # hard guard
    if word_count(out["reasoning"]) > max_words:
        out["reasoning"] = " ".join(out["reasoning"].split()[:max_words])
    return out


In [None]:
rows = []
for _, r in df.iterrows():
    out = label_one(r["review_text"])
    out = enforce_reasoning_limit(out, max_words=50)
    rows.append({
        "review_id": int(r["review_id"]),
        "review_text": r["review_text"],
        "true_is_political": int(r["true_is_political"]),
        "pred_is_political": int(out["is_political"]),
        "reasoning": out["reasoning"],
        "reasoning_words": word_count(out["reasoning"]),
    })

out_df = pd.DataFrame(rows)
out_df.head()


In [None]:
acc = (out_df.true_is_political == out_df.pred_is_political).mean()
acc


In [None]:
out_df.to_csv("labeled_reviews_output.csv", index=False)
print("Wrote labeled_reviews_output.csv")
