# NPC Cognitive Pipeline Testing

This notebook provides full observability into the NPC cognitive pipeline using the new LangGraph architecture with proper Godot action schemas.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import time
import asyncio
from typing import Any
from dataclasses import dataclass
import logging

# Import the new cognitive architecture
from mind.cognitive_architecture.pipeline import CognitivePipeline
from mind.cognitive_architecture.state import PipelineState
from mind.cognitive_architecture.models import (
    ObservationContext, 
    AvailableAction, 
    Action, 
    ActionType,
    Memory
)
from mind.cognitive_architecture.memory.store import MemoryStore

# Import LangChain LLM wrapper
from mind.apis.langchain_llm import get_llm, LangChainModel

# Set up logging to see all intermediate steps
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

## Token Counter

Simple token counting utility to measure usage at each step.

In [None]:
import tiktoken

@dataclass
class TokenUsage:
    """Track token usage for a single LLM call"""
    step: str
    input_tokens: int
    output_tokens: int
    total_tokens: int
    duration_ms: int
    
class TokenTracker:
    """Track token usage across the pipeline"""
    def __init__(self):
        self.encoder = tiktoken.encoding_for_model("gpt-4")  # Close enough for Claude
        self.usage_history: list[TokenUsage] = []
    
    def count_tokens(self, text: str) -> int:
        """Count tokens in a string"""
        return len(self.encoder.encode(text))
    
    def track_call(self, step: str, input_text: str, output_text: str, duration_ms: int):
        """Track a single LLM call"""
        usage = TokenUsage(
            step=step,
            input_tokens=self.count_tokens(input_text),
            output_tokens=self.count_tokens(output_text),
            total_tokens=self.count_tokens(input_text) + self.count_tokens(output_text),
            duration_ms=duration_ms
        )
        self.usage_history.append(usage)
        return usage
    
    def summary(self) -> dict[str, Any]:
        """Get summary of all token usage"""
        total_input = sum(u.input_tokens for u in self.usage_history)
        total_output = sum(u.output_tokens for u in self.usage_history)
        total_time = sum(u.duration_ms for u in self.usage_history)
        
        return {
            "total_input_tokens": total_input,
            "total_output_tokens": total_output,
            "total_tokens": total_input + total_output,
            "total_duration_ms": total_time,
            "steps": [{
                "step": u.step,
                "tokens": u.total_tokens,
                "duration_ms": u.duration_ms
            } for u in self.usage_history]
        }

## Initialize Pipeline Components

In [None]:
# Initialize LLM via OpenRouter
# Can easily switch models using LangChainModel constants
llm = get_llm(LangChainModel.GEMINI_FLASH_LITE)

# Initialize memory store
memory_store = MemoryStore(collection_name="test_memories")

# Create the cognitive pipeline
pipeline = CognitivePipeline(llm=llm, memory_store=memory_store)

print("Pipeline initialized with nodes:")
print("  1. Memory Query")
print("  2. Memory Retrieval") 
print("  3. Cognitive Update")
print("  4. Action Selection")

## Test Scenario Setup

Create a realistic test scenario using proper Godot action schemas.

In [None]:
# Seed the memory store with initial memories
initial_memories = [
    "Yesterday I shaped the blade and got it to the right length",
    "The customer wants a ceremonial sword with intricate engravings",
    "My apprentice has been learning quickly and can now help with basic tasks",
    "The forge needs more coal soon, running low on supplies",
    "Last week I completed a set of horseshoes for the stable",
    "The village festival is coming up in three days",
    "I promised my wife I would come home early today",
    "The apprentice's name is Tom and he's been working with me for 3 months",
    "I have a bad back from years of smithing work",
    "The local inn keeper is my best customer"
]

# Clear and repopulate memory store
memory_store.clear()
for memory_content in initial_memories:
    memory = memory_store.add_memory(memory_content, importance=5.0)
    print(f"Added memory: {memory_content[:50]}...")

print(f"\nTotal memories in store: {memory_store.collection.count()}")

In [None]:
# Create test observation context with proper Godot action schemas
# Note: This mirrors the actual format from EventFormatter.get_available_actions() in the Godot client
observation_context = ObservationContext(
    agent_id="blacksmith_001",
    observation_text="""# Current Status

## Position
You are at position (5,10).

## Needs
- Hunger: 75%
- Energy: 60%
- Fun: 40%
- Hygiene: 80%

## Visible Items

### Tom (at position (6,10))
Interactions:
- **conversation**: Talk with Tom Effects: Fun (+), Energy (-)

### Sword Blade (at position (5,11))
Interactions:
- **craft**: Continue working on the sword blade Effects: Fun (+), Energy (-)

### Forge (at position (4,10))
Interactions:
- **restart_fire**: Restart the forge fire Effects: Energy (-)

# Environment

The morning sun streams through the workshop window. Your apprentice Tom just arrived and greets you cheerfully.
The half-finished sword blade lies on your workbench. The forge fire has died down overnight.""",
    available_actions=[
        AvailableAction(
            name="MOVE_TO",
            description="Move to a specific cell",
            parameters={"destination": "Target position to move to"}
        ),
        AvailableAction(
            name="MOVE_DIRECTION",
            description="Start or stop continuous movement in a direction",
            parameters={"direction": "Direction to move (e.g., (0,-1) for up)"}
        ),
        AvailableAction(
            name="INTERACT_WITH",
            description="Interact with an item or NPC",
            parameters={
                "entity_id": "Target entity ID",
                "interaction_name": "Name of interaction to request"
            }
        ),
        AvailableAction(
            name="WANDER",
            description="Move to a random location",
            parameters={"range": "Maximum wander distance from current position"}
        ),
        AvailableAction(
            name="WAIT",
            description="Wait for a specified duration",
            parameters={"duration": "How long to wait in seconds"}
        ),
        AvailableAction(
            name="CONTINUE",
            description="Continue current activity",
            parameters={}
        ),
        AvailableAction(
            name="ACT_IN_INTERACTION",
            description="Take an action within the current interaction",
            parameters={
                "message": "Message to send (for conversations)",
                "action_type": "Type of action within interaction"
            }
        ),
        AvailableAction(
            name="CANCEL_INTERACTION",
            description="Stop the current interaction",
            parameters={}
        ),
        AvailableAction(
            name="RESPOND_TO_INTERACTION_BID",
            description="Accept or reject an interaction request",
            parameters={
                "bid_id": "ID of the bid to respond to",
                "accept": "True to accept, false to reject",
                "reason": "Optional reason for rejection"
            }
        )
    ],
    personality_traits=["hardworking", "perfectionist", "patient teacher", "caring mentor"]
)

# Initial working memory
initial_working_memory = """I am a blacksmith in the village. I've been working on a sword commission for the past two days. 
This morning I feel well-rested despite my back acting up a bit. My apprentice should help today.
I need to finish the blade today and start on the handle tomorrow."""

print("Test scenario created:")
print(f"  Agent: {observation_context.agent_id}")
print(f"  Available actions: {len(observation_context.available_actions)}")
print(f"  Personality traits: {', '.join(observation_context.personality_traits)}")

## Display Available Actions

Show how the actions format themselves for the LLM.

In [None]:
print("Available Actions (as formatted for LLM):")
print("="*60)
for i, action in enumerate(observation_context.available_actions, 1):
    print(f"{i}. {str(action)}")
    print()

## Run Cognitive Pipeline

Execute the pipeline with full observability of each step.

In [None]:
async def run_pipeline_with_observation(pipeline, observation_context, working_memory):
    """Run the pipeline and display intermediate results"""
    
    # Create initial state
    state = PipelineState(
        observation_context=observation_context,
        working_memory=working_memory
    )
    
    print("="*70)
    print("STARTING COGNITIVE PIPELINE")
    print("="*70)
    print(f"\nInitial State:")
    print(f"  Working Memory: {len(state.working_memory)} chars")
    print(f"  Observation: {len(state.observation_context.observation_text)} chars")
    print()
    
    # Run the pipeline
    start_time = time.time()
    result_state = await pipeline.process(state)
    total_time = time.time() - start_time
    
    # Display results from each step
    print("\n" + "="*70)
    print("PIPELINE STEPS COMPLETED")
    print("="*70)
    
    print("\n1. MEMORY QUERIES GENERATED:")
    print("-"*40)
    for i, query in enumerate(result_state.memory_queries, 1):
        print(f"  {i}. {query}")
    print(f"\nTime: {result_state.time_ms.get('memory_query', 0)}ms")
    
    print("\n2. MEMORIES RETRIEVED:")
    print("-"*40)
    for i, memory in enumerate(result_state.retrieved_memories, 1):
        print(f"  {i}. {memory.content}")
        print(f"     (importance: {memory.importance:.1f})")
    print(f"\nTime: {result_state.time_ms.get('memory_retrieval', 0)}ms")
    
    print("\n3. COGNITIVE UPDATE:")
    print("-"*40)
    if result_state.cognitive_context:
        print(f"  Situation: {result_state.cognitive_context.get('situation_assessment', 'N/A')}")
        print(f"  Current Goals: {', '.join(result_state.cognitive_context.get('current_goals', []))}")
        print(f"  Emotional State: {result_state.cognitive_context.get('emotional_state', 'N/A')}")
    print(f"\n  Updated Working Memory Preview:")
    print(f"  {result_state.working_memory[:200]}...")
    print(f"\nTime: {result_state.time_ms.get('cognitive_update', 0)}ms")
    
    print("\n4. ACTION SELECTED:")
    print("-"*40)
    if result_state.chosen_action:
        print(f"  Action: {result_state.chosen_action.action}")
        print(f"  Parameters: {result_state.chosen_action.parameters}")
        print(f"  Formatted: {str(result_state.chosen_action)}")
    else:
        print("  No action selected")
    print(f"\nTime: {result_state.time_ms.get('action_selection', 0)}ms")
    
    print("\n" + "="*70)
    print("PIPELINE COMPLETE")
    print("="*70)
    print(f"Total Time: {total_time:.2f} seconds")
    print(f"Total Time (tracked): {sum(result_state.time_ms.values())}ms")
    
    return result_state

# Run the pipeline
result = await run_pipeline_with_observation(pipeline, observation_context, initial_working_memory)

## Format Action for MCP Protocol

Show how the selected action would be sent back to Godot via MCP.

In [None]:
if result.chosen_action:
    # Format as MCP response
    mcp_response = {
        "action": result.chosen_action.action,
        "parameters": result.chosen_action.parameters
    }
    
    print("MCP Response to Godot:")
    print("="*40)
    print(json.dumps(mcp_response, indent=2))
    
    print("\nThis action would cause the NPC to:")
    if result.chosen_action.action == "interact_with":
        entity = result.chosen_action.parameters.get("entity_id", "unknown")
        interaction = result.chosen_action.parameters.get("interaction_name", "unknown")
        print(f"  → Request a '{interaction}' interaction with entity '{entity}'")
        print(f"  → Transition to NpcBiddingState in the controller")
    elif result.chosen_action.action == "move_to":
        dest = result.chosen_action.parameters.get("destination", "unknown")
        print(f"  → Navigate to grid position {dest}")
        print(f"  → Transition to MovingState in the controller")
    elif result.chosen_action.action == "wait":
        duration = result.chosen_action.parameters.get("duration", 1.0)
        print(f"  → Pause for {duration} seconds")
        print(f"  → Transition to WaitingState in the controller")
    elif result.chosen_action.action == "wander":
        print(f"  → Randomly explore the environment")
        print(f"  → Transition to WanderingState in the controller")
    elif result.chosen_action.action == "continue":
        print(f"  → Continue current behavior unchanged")
        print(f"  → No state transition")
else:
    print("No action was selected by the pipeline")

## Test Different Scenarios

Try the pipeline with different observations to see how it responds.

In [None]:
# Urgent scenario - low on supplies
urgent_observation = ObservationContext(
    agent_id="blacksmith_001",
    observation_text="""# Current Status

## Position
You are at position (5,10).

## Needs
- Hunger: 70%
- Energy: 55%
- Fun: 30%
- Hygiene: 75%

## Visible Items

### Tom (at position (6,10))
Interactions:
- **conversation**: Talk with Tom about getting more coal urgently Effects: Fun (+), Energy (-)

# Environment

The forge fire suddenly sputters and dies completely. You realize you're completely out of coal.
The sword commission is due tomorrow and you need the forge to finish it. Tom looks worried.""",
    available_actions=[
        AvailableAction(
            name="INTERACT_WITH",
            description="Interact with an item or NPC",
            parameters={
                "entity_id": "Target entity ID",
                "interaction_name": "Name of interaction to request"
            }
        ),
        AvailableAction(
            name="MOVE_TO",
            description="Move to a specific cell",
            parameters={"destination": "Target position to move to"}
        ),
        AvailableAction(
            name="WAIT",
            description="Wait for a specified duration",
            parameters={"duration": "How long to wait in seconds"}
        ),
        AvailableAction(
            name="CONTINUE",
            description="Continue current activity",
            parameters={}
        )
    ],
    personality_traits=["hardworking", "perfectionist", "patient teacher", "resourceful"]
)

urgent_state = PipelineState(
    observation_context=urgent_observation,
    working_memory=result.working_memory  # Use updated working memory from previous run
)

print("Testing URGENT scenario:")
print("="*40)
urgent_result = await pipeline.process(urgent_state)

if urgent_result.chosen_action:
    print(f"\nChosen Action: {urgent_result.chosen_action.action}")
    print(f"Parameters: {urgent_result.chosen_action.parameters}")
    print(f"\nCognitive Context:")
    print(f"  Situation: {urgent_result.cognitive_context.get('situation_assessment', 'N/A')}")
    print(f"  Emotional: {urgent_result.cognitive_context.get('emotional_state', 'N/A')}")

## Memory Analysis

Examine what memories were retrieved and why.

In [None]:
print("Memory Retrieval Analysis")
print("="*60)

# Test memory retrieval with different queries
test_queries = [
    "apprentice Tom teaching",
    "sword blade commission",
    "coal forge supplies",
    "wife promise home"
]

for query in test_queries:
    memories = await memory_store.search(query, top_k=2)
    print(f"\nQuery: '{query}'")
    print("-"*40)
    for i, memory in enumerate(memories, 1):
        print(f"  {i}. {memory.content}")
        print(f"     Importance: {memory.importance:.1f}")

## Performance Summary

Analyze the performance characteristics of the pipeline.

In [None]:
# Collect timing and token data from multiple runs
timing_results = []

print("Running pipeline 3 times to measure performance...")
print("="*60)

for i in range(3):
    test_state = PipelineState(
        observation_context=observation_context,
        working_memory=initial_working_memory
    )
    
    start = time.time()
    test_result = await pipeline.process(test_state)
    elapsed = (time.time() - start) * 1000  # Convert to ms
    
    timing_results.append({
        "run": i + 1,
        "total_ms": elapsed,
        "memory_query_ms": test_result.time_ms.get('memory_query', 0),
        "memory_retrieval_ms": test_result.time_ms.get('memory_retrieval', 0),
        "cognitive_update_ms": test_result.time_ms.get('cognitive_update', 0),
        "action_selection_ms": test_result.time_ms.get('action_selection', 0),
        "memory_query_tokens": test_result.tokens_used.get('memory_query', 0),
        "cognitive_update_tokens": test_result.tokens_used.get('cognitive_update', 0),
        "action_selection_tokens": test_result.tokens_used.get('action_selection', 0),
    })
    print(f"Run {i+1}: {elapsed:.0f}ms total, {sum(test_result.tokens_used.values())} tokens")

print("\nPerformance Summary:")
print("="*60)

# Calculate averages
avg_total = sum(r['total_ms'] for r in timing_results) / len(timing_results)
avg_query = sum(r['memory_query_ms'] for r in timing_results) / len(timing_results)
avg_retrieval = sum(r['memory_retrieval_ms'] for r in timing_results) / len(timing_results)
avg_cognitive = sum(r['cognitive_update_ms'] for r in timing_results) / len(timing_results)
avg_action = sum(r['action_selection_ms'] for r in timing_results) / len(timing_results)

avg_query_tokens = sum(r['memory_query_tokens'] for r in timing_results) / len(timing_results)
avg_cognitive_tokens = sum(r['cognitive_update_tokens'] for r in timing_results) / len(timing_results)
avg_action_tokens = sum(r['action_selection_tokens'] for r in timing_results) / len(timing_results)
avg_total_tokens = avg_query_tokens + avg_cognitive_tokens + avg_action_tokens

print(f"Average Total Time: {avg_total:.0f}ms")
print(f"\nBreakdown by Step:")
print(f"  Memory Query:     {avg_query:.0f}ms ({avg_query/avg_total*100:.1f}%) | {avg_query_tokens:.0f} tokens")
print(f"  Memory Retrieval: {avg_retrieval:.0f}ms ({avg_retrieval/avg_total*100:.1f}%) | 0 tokens (vector search)")
print(f"  Cognitive Update: {avg_cognitive:.0f}ms ({avg_cognitive/avg_total*100:.1f}%) | {avg_cognitive_tokens:.0f} tokens")
print(f"  Action Selection: {avg_action:.0f}ms ({avg_action/avg_total*100:.1f}%) | {avg_action_tokens:.0f} tokens")

print(f"\nToken Usage:")
print(f"  Total: {avg_total_tokens:.0f} tokens per decision")
print(f"  Cost estimate (Gemini Flash Lite): ${avg_total_tokens * 0.000000075:.6f} per decision")
print(f"  Cost estimate (Claude Sonnet 4): ${avg_total_tokens * 0.000003:.6f} per decision")

print(f"\nObservations:")
llm_time = avg_query + avg_cognitive + avg_action
print(f"  - LLM calls account for {llm_time/avg_total*100:.1f}% of total time")
print(f"  - Memory retrieval (vector search) is {avg_retrieval/avg_total*100:.1f}% of total time")
if avg_cognitive > avg_action:
    print(f"  - Cognitive update is the slowest LLM step")
else:
    print(f"  - Action selection is the slowest LLM step")
