# Pipeline Diagnosis — Cross-Conversation LoCoMo Data

Step-by-step execution of the personal memory pipeline on **2 LoCoMo conversations sharing a speaker (John)**, 2 sessions each.

| Conversation | Speakers | Sessions used |
|-------------|----------|---------------|
| conv-41 | John x Maria | 2 of 32 |
| conv-43 | Tim x John | 2 of 29 |

Pipeline per conversation: **Coref → Chunking → GSW Extraction → SpaceTimeLinker → Layer-1 → Layer-2**

Then cross-conversation: **Layer-3 agentic reconciliation** merges "John" entities across both conversations.

In [1]:
import copy
import json
import os
import sys

# Make sure the package is importable
repo_root = os.path.abspath(os.path.join(os.getcwd(), '../../../../..'))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

from dotenv import load_dotenv
load_dotenv(os.path.join(repo_root, '.env'))

from openai import OpenAI

from gsw_memory.personal_memory.data_ingestion.locomo import LoCoMoLoader, Session, Turn
from gsw_memory.personal_memory.chunker import TopicBoundaryChunker
from gsw_memory.personal_memory.models import ConversationMemory
from gsw_memory.personal_memory.reconciler import ConversationReconciler
from gsw_memory.memory.models import GSWStructure
from gsw_memory.memory.reconciler import Reconciler
from gsw_memory.memory.operator_utils.spacetime import apply_spacetime_to_gsw
from gsw_memory.prompts.operator_prompts import (
    CorefPrompts,
    ConversationalOperatorPrompts,
    SpaceTimePrompts,
)

client = OpenAI()
MODEL = 'gpt-4o'

# ---- Full GSW schema (including spacetime) — this is what the real pipeline uses ----
gsw_schema_full = GSWStructure.model_json_schema()


def print_full_gsw(gsw, label='GSW'):
    """Print a complete GSWStructure in a readable format."""
    SEP = '=' * 70
    SEP2 = '-' * 70
    print(f'\n{SEP}')
    print(f'  {label}')
    print(SEP)

    # Entities
    print(f'\n  ENTITIES ({len(gsw.entity_nodes)})')
    print(f'  {SEP2}')
    for e in gsw.entity_nodes:
        print(f'  [{e.id}] {e.name!r}  speaker={e.speaker_id!r}')
        for r in e.roles:
            states_str = ', '.join(r.states) if r.states else '\u2014'
            ev_str = ', '.join(r.evidence_turn_ids) if r.evidence_turn_ids else '\u2014'
            print(f'        role={r.role!r}  states=[{states_str}]')
            print(f'               speaker={r.speaker_id!r}  evidence=[{ev_str}]')

    # Verb phrases + questions
    print(f'\n  VERB PHRASES ({len(gsw.verb_phrase_nodes)})')
    print(f'  {SEP2}')
    for vp in gsw.verb_phrase_nodes:
        print(f'  [{vp.id}] "{vp.phrase}"')
        for q in vp.questions:
            ans_str = ', '.join(q.answers) if q.answers else '\u2014'
            ev_str = ', '.join(q.evidence_turn_ids) if q.evidence_turn_ids else '\u2014'
            print(f'    Q: {q.text}')
            print(f'       answers=[{ans_str}]  speaker={q.speaker_id!r}  evidence=[{ev_str}]')

    # Space nodes + edges
    print(f'\n  SPACE NODES ({len(gsw.space_nodes)})  /  SPACE EDGES ({len(gsw.space_edges)})')
    print(f'  {SEP2}')
    for sn in gsw.space_nodes:
        linked = [e[0] for e in gsw.space_edges if e[1] == sn.id]
        linked_names = []
        for eid in linked:
            entity = next((x for x in gsw.entity_nodes if x.id == eid), None)
            linked_names.append(entity.name if entity else eid)
        print(f'  [{sn.id}] {sn.current_name!r}  \u2192 entities: {linked_names}')
    if not gsw.space_nodes:
        print('  (none)')

    # Time nodes + edges
    print(f'\n  TIME NODES ({len(gsw.time_nodes)})  /  TIME EDGES ({len(gsw.time_edges)})')
    print(f'  {SEP2}')
    for tn in gsw.time_nodes:
        linked = [e[0] for e in gsw.time_edges if e[1] == tn.id]
        linked_names = []
        for eid in linked:
            entity = next((x for x in gsw.entity_nodes if x.id == eid), None)
            linked_names.append(entity.name if entity else eid)
        print(f'  [{tn.id}] {tn.current_name!r}  \u2192 entities: {linked_names}')
    if not gsw.time_nodes:
        print('  (none)')

    # Similarity edges
    if gsw.similarity_edges:
        print(f'\n  SIMILARITY EDGES ({len(gsw.similarity_edges)})')
        print(f'  {SEP2}')
        for e1_id, e2_id in gsw.similarity_edges:
            e1 = next((x.name for x in gsw.entity_nodes if x.id == e1_id), e1_id)
            e2 = next((x.name for x in gsw.entity_nodes if x.id == e2_id), e2_id)
            print(f'  {e1!r} \u2194 {e2!r}')

    print(f'\n{SEP}\n')


print('Imports OK')

Imports OK


In [2]:
# ---- Load LoCoMo data ----
loader = LoCoMoLoader(os.path.join(repo_root, 'gsw-memory/data/personal_memory/locomo/data/locomo10.json'))
conversations = loader.load()

# John appears in conv-41 (idx 2), conv-43 (idx 4), conv-47 (idx 6)
CONV_INDICES = [2, 4]  # conv-41 (John x Maria), conv-43 (Tim x John)
NUM_SESSIONS = 2
FOCAL_PERSON = "John"

selected_convs = [conversations[i] for i in CONV_INDICES]

print(f'Focal person: {FOCAL_PERSON}')
print(f'Conversations: {len(selected_convs)}, Sessions per conv: {NUM_SESSIONS}\n')
for conv in selected_convs:
    sessions_sel = conv.sessions[:NUM_SESSIONS]
    print(f'{conv.sample_id}: {conv.speaker_a} x {conv.speaker_b}  ({len(conv.sessions)} total sessions, using {NUM_SESSIONS})')
    for s in sessions_sel:
        print(f'  Session {s.session_id}: {s.date_time}  ({len(s.turns)} turns)')

Focal person: John
Conversations: 2, Sessions per conv: 2

conv-41: John x Maria  (32 total sessions, using 2)
  Session 1: 11:01 am on 17 December, 2022  (16 turns)
  Session 2: 6:10 pm on 22 December, 2022  (28 turns)
conv-43: Tim x John  (29 total sessions, using 2)
  Session 1: 7:48 pm on 21 May, 2023  (20 turns)
  Session 2: 5:08 pm on 15 June, 2023  (19 turns)


## Steps 1-5: Per-Conversation Pipeline

For each conversation, run: **Coref → Chunking → GSW Extraction → SpaceTimeLinker → Layer-1 → Layer-2**

Results stored in `conversation_memories` dict (conv_id → ConversationMemory) for Layer-3.

In [3]:
chunker = TopicBoundaryChunker(model_name=MODEL, max_turns_per_chunk=15)

# ---- Accumulators across all conversations ----
conversation_memories = {}          # conv_id -> ConversationMemory
all_conv_stats = {}                 # conv_id -> {per-chunk, per-session, layer2 stats}
bug1_detected = False               # Spacetime leak from GSW extraction


def derive_entity_speaker_ids(gsw):
    """Derive EntityNode.speaker_id from its roles' speaker_ids (in-place).

    If all non-None role speaker_ids agree → set entity.speaker_id to that value.
    Otherwise (mixed speakers or all None) → leave as None.
    """
    for entity in gsw.entity_nodes:
        role_speakers = {r.speaker_id for r in entity.roles if r.speaker_id is not None}
        if len(role_speakers) == 1:
            entity.speaker_id = role_speakers.pop()


for conv in selected_convs:
    sessions = conv.sessions[:NUM_SESSIONS]
    speaker_a, speaker_b = conv.speaker_a, conv.speaker_b
    conv_id = conv.sample_id
    speaker_context = f'Speaker A: {speaker_a}, Speaker B: {speaker_b}'

    print(f'\n{"#" * 70}')
    print(f'  CONVERSATION: {conv_id} ({speaker_a} x {speaker_b})')
    print(f'{"#" * 70}')

    # ================================================================
    # Step 1: Coreference Resolution
    # ================================================================
    print(f'\n--- Step 1: Coref Resolution ---')
    resolved_texts = []
    for sess_idx, sess in enumerate(sessions):
        raw_text = sess.to_document()
        response = client.chat.completions.create(
            model=MODEL,
            messages=[
                {'role': 'system', 'content': CorefPrompts.SYSTEM_PROMPT},
                {'role': 'user', 'content': CorefPrompts.USER_PROMPT_TEMPLATE.format(text=raw_text)},
            ],
            temperature=0,
            max_tokens=4000,
        )
        resolved = response.choices[0].message.content
        resolved_texts.append(resolved)

        orig_lines = raw_text.splitlines()
        res_lines = resolved.splitlines()
        changed = sum(1 for o, r in zip(orig_lines, res_lines) if o.strip() != r.strip())
        changed += abs(len(orig_lines) - len(res_lines))
        print(f'  Session {sess.session_id}: {changed}/{max(len(orig_lines), len(res_lines))} lines changed')

    # ================================================================
    # Step 2: Topic-Boundary Chunking
    # ================================================================
    print(f'\n--- Step 2: Chunking ---')
    session_chunks = []
    for sess_idx, (sess, resolved_text) in enumerate(zip(sessions, resolved_texts)):
        chunks = chunker.chunk_session_from_text(resolved_text, sess)
        session_chunks.append(chunks)
        print(f'  Session {sess.session_id}: {len(chunks)} chunk(s)')
        for ci, chunk in enumerate(chunks):
            n_lines = len(chunk.splitlines())
            print(f'    Chunk {ci}: {n_lines} lines, {len(chunk)} chars')

    # ================================================================
    # Step 3: GSW Extraction (CONVERSATIONAL prompt)
    # ================================================================
    print(f'\n--- Step 3: GSW Extraction ---')
    all_gsws = []
    flat_idx = 0
    for sess_idx, chunks in enumerate(session_chunks):
        for chunk_idx, chunk_text in enumerate(chunks):
            resp = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {'role': 'system', 'content': ConversationalOperatorPrompts.SYSTEM_PROMPT},
                    {'role': 'user', 'content': ConversationalOperatorPrompts.USER_PROMPT_TEMPLATE.format(
                        speaker_context=speaker_context,
                        input_text=chunk_text,
                        background_context='',
                    )},
                ],
                response_format={
                    'type': 'json_schema',
                    'json_schema': {'name': 'GSWStructure', 'strict': False, 'schema': gsw_schema_full},
                },
                temperature=0,
                max_tokens=4000,
            )
            gsw_dict = json.loads(resp.choices[0].message.content)
            gsw = GSWStructure(**gsw_dict)

            # Bug #1 diagnostic
            leaked = len(gsw.space_nodes) + len(gsw.time_nodes) + len(gsw.space_edges) + len(gsw.time_edges)
            if leaked:
                bug1_detected = True
                print(f'  S{sessions[sess_idx].session_id} C{chunk_idx}: {len(gsw.entity_nodes)} entities, {len(gsw.verb_phrase_nodes)} VPs  [BUG#1: {leaked} leaked spacetime fields]')
            else:
                print(f'  S{sessions[sess_idx].session_id} C{chunk_idx}: {len(gsw.entity_nodes)} entities, {len(gsw.verb_phrase_nodes)} VPs')

            # Clear hallucinated spacetime
            gsw.space_nodes = []
            gsw.time_nodes = []
            gsw.space_edges = []
            gsw.time_edges = []
            gsw.similarity_edges = []

            # Derive entity-level speaker_id from role-level speaker_ids
            derive_entity_speaker_ids(gsw)

            all_gsws.append(gsw)
            flat_idx += 1

    # ================================================================
    # Step 3b: SpaceTimeLinker
    # ================================================================
    print(f'\n--- Step 3b: SpaceTimeLinker ---')
    pre_spacetime_gsws = [copy.deepcopy(g) for g in all_gsws]

    flat_idx = 0
    for sess_idx, chunks in enumerate(session_chunks):
        session_date = sessions[sess_idx].date_time
        session_ctx = f'Session date: {session_date}' if session_date else ''

        for chunk_idx, chunk_text in enumerate(chunks):
            gsw = all_gsws[flat_idx]

            if not gsw.entity_nodes:
                print(f'  S{sessions[sess_idx].session_id} C{chunk_idx}: (no entities, skipping)')
                flat_idx += 1
                continue

            operator_output = {'entity_nodes': [e.model_dump() for e in gsw.entity_nodes]}
            operator_output_json = json.dumps(operator_output, indent=2)

            resp = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {'role': 'system', 'content': SpaceTimePrompts.SYSTEM_PROMPT},
                    {'role': 'user', 'content': SpaceTimePrompts.USER_PROMPT_TEMPLATE.format(
                        text_chunk_content=chunk_text,
                        operator_output_json=operator_output_json,
                        session_context=session_ctx,
                    )},
                ],
                temperature=0,
                max_tokens=1000,
            )
            answer_text = resp.choices[0].message.content.strip()

            if '```json' in answer_text:
                json_content = answer_text.split('```json')[1].split('```')[0].strip()
            elif '```' in answer_text:
                json_content = answer_text.split('```')[1].split('```')[0].strip()
            else:
                json_content = answer_text

            parsed = json.loads(json_content)
            links = parsed.get('spatio_temporal_links', [])

            chunk_id = f's{sess_idx}_c{chunk_idx}'
            apply_spacetime_to_gsw(gsw, links, chunk_id=chunk_id)
            print(f'  S{sessions[sess_idx].session_id} C{chunk_idx}: {len(links)} links -> {len(gsw.space_nodes)} space, {len(gsw.time_nodes)} time nodes')
            flat_idx += 1

    post_spacetime_gsws = [copy.deepcopy(g) for g in all_gsws]

    # ================================================================
    # Step 4: Layer-1 Reconciliation (per session)
    # ================================================================
    print(f'\n--- Step 4: Layer-1 Reconciliation ---')
    session_gsws = []
    gsw_cursor = 0

    for sess_idx, chunks in enumerate(session_chunks):
        n = len(chunks)
        sess_chunk_gsws = all_gsws[gsw_cursor:gsw_cursor + n]
        gsw_cursor += n

        reconciler = Reconciler(matching_approach='exact')
        for chunk_idx, (chunk_text, chunk_gsw) in enumerate(zip(chunks, sess_chunk_gsws)):
            chunk_id_l1 = f'session{sess_idx}_chunk{chunk_idx}'
            gsw_copy = copy.deepcopy(chunk_gsw)
            reconciler.reconcile(new_gsw=gsw_copy, chunk_id=chunk_id_l1, new_chunk_text=chunk_text)

        session_gsw = reconciler.global_memory
        if session_gsw is None:
            session_gsw = GSWStructure(entity_nodes=[], verb_phrase_nodes=[])

        # Stamp conversation_id
        for entity in session_gsw.entity_nodes:
            entity.conversation_id = conv_id
            for role in entity.roles:
                role.conversation_id = conv_id

        session_gsws.append(session_gsw)
        print(f'  Session {sessions[sess_idx].session_id}: {len(session_gsw.entity_nodes)} entities, '
              f'{len(session_gsw.verb_phrase_nodes)} VPs, '
              f'{len(session_gsw.space_nodes)} space, {len(session_gsw.time_nodes)} time nodes')

    # ================================================================
    # Step 5: Layer-2 Cross-Session Reconciliation
    # ================================================================
    print(f'\n--- Step 5: Layer-2 Reconciliation ---')
    conv_reconciler = ConversationReconciler()
    conversation_gsw = conv_reconciler.reconcile_sessions(
        session_gsws=session_gsws,
        speaker_a=speaker_a,
        speaker_b=speaker_b,
    )

    print(f'  {len(conversation_gsw.entity_nodes)} entities, '
          f'{len(conversation_gsw.verb_phrase_nodes)} VPs, '
          f'{len(conversation_gsw.space_nodes)} space, {len(conversation_gsw.time_nodes)} time nodes')

    print_full_gsw(conversation_gsw, label=f'{conv_id} AFTER LAYER-2')

    # Store results
    conv_memory = ConversationMemory(
        conversation_id=conv_id,
        speaker_a=speaker_a,
        speaker_b=speaker_b,
        gsw=conversation_gsw,
        session_gsws=session_gsws,
    )
    conversation_memories[conv_id] = conv_memory

    # Stats for summary table
    all_conv_stats[conv_id] = {
        'post_spacetime_gsws': post_spacetime_gsws,
        'session_gsws': session_gsws,
        'session_chunks': session_chunks,
        'sessions': sessions,
        'conversation_gsw': conversation_gsw,
    }

print(f'\n{"=" * 70}')
print(f'Steps 1-5 complete for {len(conversation_memories)} conversations.')
for cid, cm in conversation_memories.items():
    print(f'  {cid}: {len(cm.gsw.entity_nodes)} entities, {len(cm.gsw.verb_phrase_nodes)} VPs')


######################################################################
  CONVERSATION: conv-41 (John x Maria)
######################################################################

--- Step 1: Coref Resolution ---
  Session 1: 11/18 lines changed
  Session 2: 55/57 lines changed

--- Step 2: Chunking ---
  Session 1: 2 chunk(s)
    Chunk 0: 7 lines, 688 chars
    Chunk 1: 13 lines, 1545 chars
  Session 2: 2 chunk(s)
    Chunk 0: 16 lines, 2287 chars
    Chunk 1: 16 lines, 1997 chars

--- Step 3: GSW Extraction ---
  S1 C0: 8 entities, 4 VPs
  S1 C1: 6 entities, 5 VPs  [BUG#1: 4 leaked spacetime fields]
  S2 C0: 6 entities, 3 VPs  [BUG#1: 2 leaked spacetime fields]
  S2 C1: 8 entities, 3 VPs  [BUG#1: 6 leaked spacetime fields]

--- Step 3b: SpaceTimeLinker ---
  S1 C0: 3 links -> 1 space, 2 time nodes
  S1 C1: 4 links -> 2 space, 2 time nodes
  S2 C0: 3 links -> 1 space, 2 time nodes
  S2 C1: 3 links -> 1 space, 2 time nodes

--- Step 4: Layer-1 Reconciliation ---


  Session 1: 11 entities, 9 VPs, 3 space, 3 time nodes


Generating train split: 0 examples [00:00, ? examples/s]

Output()

  Session 2: 11 entities, 6 VPs, 2 space, 3 time nodes

--- Step 5: Layer-2 Reconciliation ---


  19 entities, 15 VPs, 5 space, 6 time nodes

  conv-41 AFTER LAYER-2

  ENTITIES (19)
  ----------------------------------------------------------------------
  [session0_John::session0_chunk0::e1] 'John'  speaker='John'
        role='speaker'  states=[—]
               speaker='John'  evidence=[D1:2, D1:4]
        role='traveler'  states=[just got back from a family road trip]
               speaker='John'  evidence=[D1:2]
        role='kickboxer'  states=[doing kickboxing, kickboxing is giving so much energy]
               speaker='John'  evidence=[D1:4]
        role='aspiring politician'  states=[hoping to get into local politics, passionate about improving education and infrastructure]
               speaker='John'  evidence=[D1:1, D1:3]
        role='community helper'  states=[loves helping the community, making the community a better place]
               speaker='John'  evidence=[D1:1]
        role='campaigner'  states=[networking with people, motivated to improve education]
 

Generating train split: 0 examples [00:00, ? examples/s]

Output()

Updated 3 questions with new answers.


Generating train split: 0 examples [00:00, ? examples/s]

Output()

Updated 4 questions with new answers.
  Session 1: 15 entities, 14 VPs, 3 space, 4 time nodes


Generating train split: 0 examples [00:00, ? examples/s]

Output()

Updated 1 questions with new answers.


Generating train split: 0 examples [00:00, ? examples/s]

Output()

Updated 1 questions with new answers.
  Session 2: 14 entities, 8 VPs, 1 space, 4 time nodes

--- Step 5: Layer-2 Reconciliation ---


  27 entities, 22 VPs, 4 space, 8 time nodes

  conv-43 AFTER LAYER-2

  ENTITIES (27)
  ----------------------------------------------------------------------
  [session0_Tim::session0_chunk0::e1] 'Tim'  speaker='Tim'
        role='speaker'  states=[—]
               speaker='Tim'  evidence=[D1:2, D1:4, D1:6, D1:8, D1:10, D1:12]
        role='collaborator'  states=[discussing collaborations for a Harry Potter fan project]
               speaker='Tim'  evidence=[D1:4]
        role='speaker'  states=[discussing Harry Potter fan project]
               speaker='Tim'  evidence=[D1:2]
        role='speaker'  states=[discussing Harry Potter universe, visited Harry Potter place in London, went on a tour]
               speaker='Tim'  evidence=[D1:1, D1:3]
        role='forum participant'  states=[joined a fantasy literature forum, had a great talk about favorite books]
               speaker='Tim'  evidence=[D2:1]
        role='supporter'  states=[supports John's endorsement journey]
       

## Step 6 — Layer-3 Agentic Cross-Conversation Reconciliation

`ConversationReconciler.reconcile_conversations_agentic()` uses an LLM agent with tool calls
to reason about entity identity **across conversations** and produce a unified global GSW.

With 2 conversations (conv-41: John x Maria, conv-43: Tim x John), the agent should identify
that "John" appears in both and merge those entities while keeping Maria, Tim, etc. separate.

In [4]:
from gsw_memory.personal_memory.reconciler import _LAYER3_SYSTEM, _LAYER3_USER

# conversation_memories is already populated from the pipeline cell above

# 1. Collect all entities and VPs from all conversations
all_entities = []
for cid, cm in conversation_memories.items():
    for entity in cm.gsw.entity_nodes:
        if entity.conversation_id is None:
            entity.conversation_id = cid
        all_entities.append(entity)

all_vps = []
for cm in conversation_memories.values():
    all_vps.extend(cm.gsw.verb_phrase_nodes)

# Deduplicate VPs by id
seen_vp_ids = set()
deduped_vps = []
for vp in all_vps:
    if vp.id not in seen_vp_ids:
        seen_vp_ids.add(vp.id)
        deduped_vps.append(vp)
if len(deduped_vps) < len(all_vps):
    print(f'  [Deduplication] Removed {len(all_vps) - len(deduped_vps)} duplicate VPs (same id)')
all_vps = deduped_vps

print(f'Layer-3 input: {len(all_entities)} entities, {len(all_vps)} VPs from {len(conversation_memories)} conversations\n')

# 2. Build entity summary
reconciler_l3 = ConversationReconciler()
entity_summary = reconciler_l3._build_entity_summary(all_entities)
print('Entity summary for agent:')
print(entity_summary)

# 3. Set up agent
reconciler_l3._all_vps = all_vps

tool_definitions = [
    {
        "type": "function",
        "name": "get_entity_timeline",
        "description": "Get all roles, states, and associated verb phrase questions for a specific entity by index.",
        "parameters": {
            "type": "object",
            "properties": {
                "entity_index": {"type": "integer", "description": "0-based entity index from the summary"},
            },
            "required": ["entity_index"],
        },
    },
    {
        "type": "function",
        "name": "compare_entities",
        "description": "Compare two entities by index to assess if they refer to the same real-world entity.",
        "parameters": {
            "type": "object",
            "properties": {
                "index_a": {"type": "integer"},
                "index_b": {"type": "integer"},
            },
            "required": ["index_a", "index_b"],
        },
    },
    {
        "type": "function",
        "name": "detect_contradictions",
        "description": "Detect contradictory states between two entities.",
        "parameters": {
            "type": "object",
            "properties": {
                "index_a": {"type": "integer"},
                "index_b": {"type": "integer"},
            },
            "required": ["index_a", "index_b"],
        },
    },
    {
        "type": "function",
        "name": "get_unanswered_questions",
        "description": "List all unanswered (None) questions for an entity, with cross-references from other VPs with same phrase.",
        "parameters": {
            "type": "object",
            "properties": {
                "entity_index": {"type": "integer", "description": "0-based entity index"},
            },
            "required": ["entity_index"],
        },
    },
    {
        "type": "function",
        "name": "get_duplicate_verb_phrases",
        "description": "List groups of VPs with the same phrase text, showing each VP's questions and answers side-by-side.",
        "parameters": {"type": "object", "properties": {}},
    },
    {
        "type": "function",
        "name": "get_reconciliation_summary",
        "description": "Get current reconciliation state: entity count, unanswered questions, duplicate VPs.",
        "parameters": {"type": "object", "properties": {}},
    },
    {
        "type": "function",
        "name": "resolve_question",
        "description": "Set the answer for a specific question.",
        "parameters": {
            "type": "object",
            "properties": {
                "vp_id": {"type": "string"},
                "question_id": {"type": "string"},
                "new_answers": {
                    "type": "array",
                    "items": {"type": "string"},
                },
            },
            "required": ["vp_id", "question_id", "new_answers"],
        },
    },
    {
        "type": "function",
        "name": "merge_verb_phrases",
        "description": "Merge two verb phrases that represent the same event.",
        "parameters": {
            "type": "object",
            "properties": {
                "keep_vp_id": {"type": "string"},
                "discard_vp_id": {"type": "string"},
            },
            "required": ["keep_vp_id", "discard_vp_id"],
        },
    },
    {
        "type": "function",
        "name": "finish_reconciliation",
        "description": "Complete reconciliation with a list of entity merge pairs. Call this as the final step.",
        "parameters": {
            "type": "object",
            "properties": {
                "merge_pairs": {
                    "type": "array",
                    "items": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "minItems": 2,
                        "maxItems": 2,
                    },
                },
            },
            "required": ["merge_pairs"],
        },
    },
]

messages = [
    {"role": "system", "content": _LAYER3_SYSTEM},
    {
        "role": "user",
        "content": _LAYER3_USER.format(
            person_id=FOCAL_PERSON,
            entity_summary=entity_summary,
        ),
    },
]

print(f'\n{"=" * 70}')
print(f'  LAYER-3 AGENT TRACE (focal person: {FOCAL_PERSON})')
print(f'{"=" * 70}')

merge_pairs = []
finished = False
max_iterations = 20

for iteration in range(max_iterations):
    print(f'\n{"─" * 70}')
    print(f'  Iteration {iteration + 1}')
    print(f'{"─" * 70}')

    response = client.responses.create(
        model=MODEL,
        input=messages,
        tools=tool_definitions,
        tool_choice="auto",
        temperature=0,
    )

    tool_calls_in_turn = []
    for item in response.output:
        item_type = getattr(item, 'type', None)

        if item_type == "message":
            for part in item.content:
                part_type = getattr(part, 'type', None)
                if part_type == "output_text":
                    print(f'\n  AGENT TEXT:')
                    for line in part.text.splitlines():
                        print(f'    {line}')
                elif part_type == "refusal":
                    print(f'\n  AGENT REFUSAL: {part.refusal}')

        elif item_type == "reasoning":
            if hasattr(item, 'summary') and item.summary:
                for s in item.summary:
                    if hasattr(s, 'text'):
                        print(f'\n  AGENT REASONING:')
                        for line in s.text.splitlines():
                            print(f'    {line}')

        elif item_type == "function_call":
            tool_calls_in_turn.append(item)
            args = json.loads(item.arguments) if isinstance(item.arguments, str) else item.arguments
            print(f'\n  TOOL CALL: {item.name}({json.dumps(args, indent=2)})')

    if not tool_calls_in_turn:
        print('  (no tool calls -- agent finished)')
        break

    messages.extend(response.output)

    tool_results = []
    for tc in tool_calls_in_turn:
        args = json.loads(tc.arguments) if isinstance(tc.arguments, str) else tc.arguments
        result = reconciler_l3._dispatch_tool(tc.name, args, all_entities)

        print(f'  RESULT ({tc.name}): {json.dumps(result, indent=2)}')

        if tc.name == "finish_reconciliation":
            merge_pairs = args.get("merge_pairs", [])
            finished = True

        tool_results.append({
            "type": "function_call_output",
            "call_id": tc.call_id,
            "output": json.dumps(result),
        })

    messages.extend(tool_results)

    if finished:
        print(f'\n  RECONCILIATION FINISHED')
        print(f'  Merge pairs: {merge_pairs}')
        break

# Apply merges
global_gsw_entities = reconciler_l3._apply_merges(all_entities, merge_pairs, all_vps)

# Collect spacetime from all conversations
all_space_nodes, all_time_nodes = [], []
all_space_edges, all_time_edges = [], []
for cm in conversation_memories.values():
    all_space_nodes.extend(cm.gsw.space_nodes)
    all_time_nodes.extend(cm.gsw.time_nodes)
    all_space_edges.extend(cm.gsw.space_edges)
    all_time_edges.extend(cm.gsw.time_edges)

# Remap spacetime edges for merged entities
entity_id_remap = getattr(reconciler_l3, '_entity_id_remap', {})
if entity_id_remap:
    print(f'\n  Entity merges applied: {entity_id_remap}')
    all_space_edges = [(entity_id_remap.get(eid, eid), sid) for eid, sid in all_space_edges]
    all_time_edges = [(entity_id_remap.get(eid, eid), tid) for eid, tid in all_time_edges]

global_gsw = GSWStructure(
    entity_nodes=global_gsw_entities,
    verb_phrase_nodes=reconciler_l3._all_vps,
    space_nodes=all_space_nodes,
    time_nodes=all_time_nodes,
    space_edges=all_space_edges,
    time_edges=all_time_edges,
)

# Show before/after counts
total_l2_entities = sum(len(cm.gsw.entity_nodes) for cm in conversation_memories.values())
print(f'\n{"=" * 70}')
print(f'Layer-3 complete.  Iterations: {iteration + 1}')
print(f'Entities: {total_l2_entities} (Layer-2 total) -> {len(global_gsw.entity_nodes)} (Layer-3)')
print(f'Verb phrases: {len(global_gsw.verb_phrase_nodes)}')
print(f'Space nodes: {len(global_gsw.space_nodes)}, Time nodes: {len(global_gsw.time_nodes)}')

print_full_gsw(global_gsw, label='AFTER LAYER-3 (agentic cross-conversation reconciliation)')

  [Deduplication] Removed 15 duplicate VPs (same id)
Layer-3 input: 46 entities, 22 VPs from 2 conversations

Entity summary for agent:
0. [conv-41 / John] John: speaker; traveler [just got back from a family road trip]; kickboxer [doing kickboxing, kickboxing is giving so much energy]; aspiring politician [hoping to get into local politics, passionate about improving education and infrastructure]; community helper [loves helping the community, making the community a better place]; campaigner [networking with people, motivated to improve education]; family member [motivated by family, supported by family]; family member [enjoys climbing, sliding, and playing games, makes pizza with family, shares meals with family]
1. [conv-41 / John] family road trip: event [fun]
2. [conv-41 / John] kickboxing: workout [giving so much energy]
3. [conv-41 / John] yesterday: temporal reference [John got back from road trip]
4. [conv-41 / John] local leaders and organizations: potential supporters [John 

## Diagnosis Summary

Summary table of counts at each pipeline stage, plus bug detection flags.

In [5]:
print(f'{"=" * 90}')
print(f'{"DIAGNOSIS SUMMARY":^90}')
print(f'{"=" * 90}')

header = f'{"Stage":40} {"Entities":>10} {"VPs":>6} {"SpaceN":>8} {"TimeN":>7} {"SpaceE":>8} {"TimeE":>7}'

for conv_id, stats in all_conv_stats.items():
    cm = conversation_memories[conv_id]
    sessions_conv = stats['sessions']
    session_chunks_conv = stats['session_chunks']
    post_st = stats['post_spacetime_gsws']
    sess_gsws = stats['session_gsws']
    conv_gsw = stats['conversation_gsw']

    print(f'\n  {conv_id} ({cm.speaker_a} x {cm.speaker_b})')
    print(f'  {"─" * 86}')
    print(f'  {header}')
    print(f'  {"─" * 86}')

    # Per-chunk stats
    flat_idx = 0
    for sess_idx, chunks in enumerate(session_chunks_conv):
        for chunk_idx in range(len(chunks)):
            g = post_st[flat_idx]
            label = f'S{sessions_conv[sess_idx].session_id} C{chunk_idx} (post-ST)'
            print(f'  {label:40} {len(g.entity_nodes):>10} {len(g.verb_phrase_nodes):>6} '
                  f'{len(g.space_nodes):>8} {len(g.time_nodes):>7} '
                  f'{len(g.space_edges):>8} {len(g.time_edges):>7}')
            flat_idx += 1

    print(f'  {"─" * 86}')
    for sess_idx, sg in enumerate(sess_gsws):
        label = f'Session {sessions_conv[sess_idx].session_id} (Layer-1)'
        print(f'  {label:40} {len(sg.entity_nodes):>10} {len(sg.verb_phrase_nodes):>6} '
              f'{len(sg.space_nodes):>8} {len(sg.time_nodes):>7} '
              f'{len(sg.space_edges):>8} {len(sg.time_edges):>7}')

    print(f'  {"─" * 86}')
    label = f'{conv_id} (Layer-2)'
    print(f'  {label:40} {len(conv_gsw.entity_nodes):>10} {len(conv_gsw.verb_phrase_nodes):>6} '
          f'{len(conv_gsw.space_nodes):>8} {len(conv_gsw.time_nodes):>7} '
          f'{len(conv_gsw.space_edges):>8} {len(conv_gsw.time_edges):>7}')

# Global (Layer-3) summary
print(f'\n  {"=" * 86}')
print(f'  CROSS-CONVERSATION (Layer-3)')
print(f'  {"─" * 86}')

# Layer-2 totals
total_entities = sum(len(cm.gsw.entity_nodes) for cm in conversation_memories.values())
total_vps = sum(len(cm.gsw.verb_phrase_nodes) for cm in conversation_memories.values())
label = 'Layer-2 total (all convs)'
print(f'  {label:40} {total_entities:>10} {total_vps:>6}')

label = 'Layer-3 global'
print(f'  {label:40} {len(global_gsw.entity_nodes):>10} {len(global_gsw.verb_phrase_nodes):>6} '
      f'{len(global_gsw.space_nodes):>8} {len(global_gsw.time_nodes):>7} '
      f'{len(global_gsw.space_edges):>8} {len(global_gsw.time_edges):>7}')

reduction = total_entities - len(global_gsw.entity_nodes)
print(f'\n  Entities merged by Layer-3: {reduction} ({reduction/max(total_entities,1)*100:.0f}% reduction)')

print(f'\n{"=" * 90}')

# Bug flags
print(f'\nBug Detection:')
print(f'  #1 Spacetime leak from GSW extraction:  {"YES" if bug1_detected else "NO"}')

# Check spacetime across all conversations
has_spacetime = False
for stats in all_conv_stats.values():
    if any(g.space_nodes or g.time_nodes for g in stats['post_spacetime_gsws']):
        has_spacetime = True
        break
print(f'  #2 SpaceTimeLinker produced nodes:       {"YES" if has_spacetime else "NO (bug!)"}')

# Dangling edges across all Layer-2 + Layer-3
dangling_count = 0
gsws_to_check = [cm.gsw for cm in conversation_memories.values()] + [global_gsw]
for gsw in gsws_to_check:
    entity_ids = {e.id for e in gsw.entity_nodes}
    space_node_ids = {s.id for s in gsw.space_nodes}
    time_node_ids = {t.id for t in gsw.time_nodes}
    for eid, sid in gsw.space_edges:
        if eid not in entity_ids or sid not in space_node_ids:
            dangling_count += 1
    for eid, tid in gsw.time_edges:
        if eid not in entity_ids or tid not in time_node_ids:
            dangling_count += 1
print(f'  Dangling spacetime edges (all L2+L3):    {dangling_count}')

                                    DIAGNOSIS SUMMARY                                     

  conv-41 (John x Maria)
  ──────────────────────────────────────────────────────────────────────────────────────
  Stage                                      Entities    VPs   SpaceN   TimeN   SpaceE   TimeE
  ──────────────────────────────────────────────────────────────────────────────────────
  S1 C0 (post-ST)                                   8      4        1       2        2       6
  S1 C1 (post-ST)                                   6      5        2       2        5       6
  S2 C0 (post-ST)                                   6      3        1       2        2       5
  S2 C1 (post-ST)                                   8      3        1       2        3       8
  ──────────────────────────────────────────────────────────────────────────────────────
  Session 1 (Layer-1)                              11      9        3       3        7       9
  Session 2 (Layer-1)                         