# NPC Studio Data Analysis

This notebook loads the **npcs.json** file from the workspace and analyzes each NPC's **role**, persona, capabilities, and lore to support NPC Studio design and consistency.

## 1. Load data

In [None]:
import json
from pathlib import Path

# Load npcs.json from workspace (works from repo root or from notebooks/)
base = Path.cwd()
if (base / "data" / "npcs.json").exists():
    npcs_path = base / "data" / "npcs.json"
else:
    npcs_path = base / ".." / "data" / "npcs.json"

with open(npcs_path, "r", encoding="utf-8") as f:
    npcs = json.load(f)

print(f"Loaded {len(npcs)} NPCs from {npcs_path.resolve()}")
npcs[:1]  # peek first record

## 2. Flatten for analysis (roles, persona, capabilities, lore)

In [None]:
import pandas as pd

rows = []
for n in npcs:
    p = n.get("persona") or {}
    r = n.get("rules") or {}
    c = n.get("capabilities") or {}
    lore = n.get("lore_facts") or []
    do_not = r.get("do_not") or []
    rows.append({
        "id": n.get("id"),
        "name": n.get("name"),
        "role": n.get("role"),
        "backstory": p.get("backstory", ""),
        "goals": p.get("goals", ""),
        "voice_style": p.get("voice_style", ""),
        "backstory_len": len(p.get("backstory", "")),
        "goals_len": len(p.get("goals", "")),
        "lore_facts_count": len(lore),
        "do_not_count": len(do_not),
        "num_gestures": len(c.get("allowed_gestures") or []),
        "num_actions": len(c.get("allowed_actions") or []),
        "has_spoiler_policy": bool((r.get("spoiler_policy") or "").strip()),
    })

df = pd.DataFrame(rows)
df

## 3. Role distribution — who populates the world?

In [None]:
role_counts = df["role"].value_counts()
print("NPCs by role (contribution to NPC Studio world):")
print(role_counts.to_string())
role_counts.plot(kind="bar", title="NPCs by role", xlabel="Role", ylabel="Count", figsize=(8, 4));

## 4. Understanding each NPC's role — summary table

In [None]:
# Per-NPC summary: name, role, short persona hint, lore count, capabilities
summary = df[["name", "role", "backstory", "goals", "voice_style", "lore_facts_count", "num_gestures", "num_actions"]].copy()
summary["backstory_preview"] = summary["backstory"].str[:60] + "..."
summary["goals_preview"] = summary["goals"].str[:50].replace("", "—") + (summary["goals"].str.len() > 50).map({True: "...", False: ""})
summary[["name", "role", "backstory_preview", "goals_preview", "voice_style", "lore_facts_count", "num_gestures", "num_actions"]].style.set_caption("NPC role summary for NPC Studio")

## 5. Persona depth — backstory and goals length

In [None]:
print("Persona depth (characters):")
print(df[["name", "role", "backstory_len", "goals_len"]].to_string(index=False))
df.plot(x="name", y=["backstory_len", "goals_len"], kind="bar", figsize=(10, 4), title="Backstory & goals length by NPC", xlabel="");

## 6. Lore and rules — who has the most world-building?

In [None]:
print("Lore facts count (world-building per NPC):")
print(df[["name", "role", "lore_facts_count", "do_not_count", "has_spoiler_policy"]].to_string(index=False))
df.plot(x="name", y="lore_facts_count", kind="bar", title="Lore facts per NPC", xlabel="", figsize=(10, 4), legend=False);

## 7. Role-by-role breakdown for NPC Studio

In [None]:
for role in df["role"].unique():
    subset = df[df["role"] == role]
    names = ", ".join(subset["name"].tolist())
    avg_lore = subset["lore_facts_count"].mean()
    avg_gestures = subset["num_gestures"].mean()
    avg_actions = subset["num_actions"].mean()
    print(f"**{role}** ({len(subset)}): {names}")
    print(f"   Avg lore facts: {avg_lore:.1f}  |  Avg gestures: {avg_gestures:.1f}  |  Avg actions: {avg_actions:.1f}")
    print()

## 8. Export a flat summary for reports or Databricks

In [None]:
out_dir = Path.cwd() if (Path.cwd() / "data").exists() else Path.cwd().parent
out_path = out_dir / "npc_studio_summary.csv"
df.to_csv(out_path, index=False)
print(f"Saved summary to {out_path.resolve()}")