In [1]:
%load_ext autoreload
%autoreload 2

import pprint
import sys
import textwrap

In [2]:
import config
from dataclasses import dataclass

from llama_index.core import Document, Settings, VectorStoreIndex, ChatPromptTemplate
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.anthropic import Anthropic
from npc.prompts import NpcPrompt
from npc.prompts.common import Prompt

api_keys = config.Config("../../../api_keys.cfg")

Settings.llm = None

  from .autonotebook import tqdm as notebook_tqdm


LLM is explicitly disabled. Using MockLLM.


In [3]:
anthropic_api_key = api_keys["ANTHROPIC_API_KEY"]
small_llm = Anthropic(model="claude-3-haiku-20240307", api_key=anthropic_api_key, max_tokens=4096)

In [4]:
# response = small_llm.complete("What is the square root of 44? Manually do this using Newton's method.")
# response_text = response.text
# print(response_text)

# NPC Agent POC

In [5]:
from npc.interfaces import SimulatorInterface, SimulatorResponse, SimulatorRequest
from npc.llm_response_generator import LLMResponseGenerator

In [6]:
# test_working_memory = (
#     "I am walking through a forest. The trees are tall and the air is fresh. I feel a sense of peace and tranquility.",
# )
# test_observations = (
#     "I notice a squirrel running up a tree. The sunlight filters through the leaves, creating dappled patterns on the ground.",
#     "I hear the chirping of birds and the rustling of leaves in the gentle breeze.",
#     "I feel the coolness of the air on my skin and the softness of the ground beneath my feet.",
#     "I smell the earthy scent of moss and the sweet fragrance of wildflowers.",
# )

# # <query_1>peaceful forest walk</query_1>
# # <query_2>tranquil woodland experience</query_2>
# # <query_3>nature's sights and sounds</query_3>
# # <query_4>sensations of outdoor environment</query_4>
# # <query_5>childhood memories of forests</query_5>
# # <query_6>relaxing outdoor activities</query_6>
# # <query_7>wildlife in forest settings</query_7>
# # <query_8>seasonal changes in forests</query_8>
# # <query_9>favorite outdoor retreats</query_9>
# # <query_10>environmental awareness and appreciation</query_10>
# test_retrieved_memories = (
#     "I remember a peaceful forest walk I took last summer. The trees were tall and the air was fresh, creating a sense of tranquility.",
#     "I recall a childhood memory of exploring a woodland area near my home. The sights and sounds of nature left a lasting impression on me.",
#     "I have a vivid memory of a camping trip where I experienced the beauty of nature up close. The wildlife and seasonal changes were fascinating.",
#     "I think back to a favorite outdoor retreat I visited with friends. The environmental awareness and appreciation we shared was memorable.",
# )

In [7]:
# TODO: ask LLM to output in a structured reasoning template. Try to simplify the prompts (remove some of the less important items with claude's help)
#       and incorporate others into reasoning steps

# query_response_generator = LLMResponseGenerator(NpcPrompt.MEMORY_QUERY_FORMULATION.value, small_llm)
# query_response = query_response_generator.generate_response(
#     working_memory=test_working_memory,
#     observations=test_observations,
# )
# pprint.pprint(query_response, width=120)

In [8]:
# memory_report_synthesis_generator = LLMResponseGenerator(NpcPrompt.MEMORY_REPORT_SYNTHESIS.value, small_llm)
# memory_report_synthesis_response = memory_report_synthesis_generator.generate_response(
#     working_memory=test_working_memory,
#     retrieved_memories=test_retrieved_memories,
# )
# pprint.pprint(memory_report_synthesis_response, width=120)

In [9]:
# working_memory_generator = LLMResponseGenerator(NpcPrompt.WORKING_MEMORY_UPDATE.value, small_llm)
# working_memory_response = working_memory_generator.generate_response(
#     working_memory=test_working_memory,
#     memory_report=memory_report_synthesis_response,
# )
# pprint.pprint(working_memory_response, width=120)

In [10]:
# action_decision_generator = LLMResponseGenerator(NpcPrompt.ACTION_DECISION.value, small_llm)
# action_decision_response = action_decision_generator.generate_response(
#     working_memory=working_memory_response,
#     actions=[
#         "1. Explore the forest to discover new sights and sounds.",
#         "2. Sit down and meditate to deepen the sense of peace and tranquility.",
#         "3. Take out a notebook and start sketching the trees and wildlife around you.",
#     ]
# )
# pprint.pprint(action_decision_response, width=120)

In [11]:
class MemoryDatabase:
    def __init__(self, initial_memories: list[str]):
        self.index = VectorStoreIndex.from_documents(
            [Document(text=memory) for memory in initial_memories],
            embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
        )

    def add_memories(self, memories: list[str]):
        for memory in memories:
            self.index.insert(Document(text=memory))
    
    def retrieve(self, query: str, top_k: int = 5):
        retriever = VectorIndexRetriever(index=self.index, similarity_top_k=top_k)
        return retriever.retrieve(query)


@dataclass
class LLMConfig:
    small_llm: Anthropic
    large_llm: Anthropic

from npc.interfaces.text_adventure_interface import TextAdventureInterface
from typing import Generic, TypeVar

SimulatorRequestType = TypeVar('RequestType', bound=SimulatorRequest)
SimulatorResponseType = TypeVar('ResponseType', bound=SimulatorResponse)
SimulatorType = TypeVar('SimulatorType')

class Agent(Generic[SimulatorRequestType, SimulatorResponseType, SimulatorType]):
    def __init__(
            self, 
            simulator_interface: SimulatorInterface[SimulatorRequestType, SimulatorResponseType, SimulatorType],
            llm_config: LLMConfig,
            initial_working_memory: str = "",
            initial_long_term_memories: list[str] = [],
        ):
        self.simulator_interface = simulator_interface
        self.prev_simulator_response = None

        self.working_memory = initial_working_memory
        self.long_term_memory = MemoryDatabase(initial_long_term_memories)
        # TODO: need to represent personality and background. Use a couple different personality test results to give as reference, as well as some core background information

        self.query_generator = LLMResponseGenerator(NpcPrompt.MEMORY_QUERY_FORMULATION.value, llm_config.small_llm)
        self.memory_report_generator = LLMResponseGenerator(NpcPrompt.MEMORY_REPORT_SYNTHESIS.value, llm_config.small_llm)
        self.working_memory_generator = LLMResponseGenerator(NpcPrompt.WORKING_MEMORY_UPDATE.value, llm_config.small_llm)
        self.action_decision_generator = LLMResponseGenerator(NpcPrompt.ACTION_DECISION.value, llm_config.small_llm)

    def update_state(self, prev_simulator_response: SimulatorResponseType) -> SimulatorRequestType:
        self.prev_simulator_response = prev_simulator_response
        self.update_working_memory()
        # TODO: update long-term memory based on working memory
        # TODO: save to long term memory. This can be done asynchronously to avoid blocking the agent
        # - Unlike the Generative Agents paper, use magnitude estimation to get more accurate and actionable importance scores (will need to retrieve examples to set a baseline)

    def update_working_memory(self) -> None:
        # Formulate queries and retrieve from long-term memory
        query_response = self.query_generator.generate_response(
            working_memory=self.working_memory,
            observation=self.prev_simulator_response.observation,
        )
        retrieved_memories = []
        for query in query_response.get("queries", []):
            query_memories = self.long_term_memory.retrieve(query)
            # TODO: have some sort of LLM filtering / reranking process here
            if query_memories:
                retrieved_memories.append(query_memories[0])

        # Draft memory report based on working memory and retrieved memories
        memory_report_response = self.memory_report_generator.generate_response(
            working_memory=self.working_memory,
            observation=self.prev_simulator_response.observation,
            retrieved_memories=retrieved_memories,
        )

        # Update working memory based on memory report
        working_memory_response = self.working_memory_generator.generate_response(
            working_memory=self.working_memory,
            memory_report=memory_report_response["memory_report"],
        )
        if working_memory_response["updated_working_memory"]:
            self.working_memory = working_memory_response["updated_working_memory"]
        
        # TODO: remove debug print statements. Replace with loguru logging
        print("\n".join([
            "Updated working memory:",
            textwrap.fill(self.working_memory, width=120),
            "",
        ]))

    def choose_action(self) -> SimulatorRequestType:
        next_action = self.action_decision_generator.generate_response(
            working_memory=self.working_memory,
            available_actions=self.prev_simulator_response.available_actions_llm_str(),
            action_request_documentation=self.simulator_interface.request_class().documentation_llm_str(),
        )
        # TODO: recover when invalid json causes a ValidationError
        return self.simulator_interface.request_class.parse_json(next_action["action_decision"])


# Sandbox

In [34]:
memories = [
    "I am a human.",
    "I had an apple for breakfast.",
    "My name is John.",
    "My favorite color is blue.",
    "I have a pet cat.",
    "I am 25 years old.",
    "I am a software engineer.",
]

db = MemoryDatabase(memories)

In [None]:
query = "What did I have for breakfast?"
results = db.retrieve(query, top_k=3)
for result in results:
    print(result)

In [None]:
db.add_memories(["I had a salad for lunch."])
results = db.retrieve("What did I have for lunch?", top_k=3)
for result in results:
    print(result)

In [None]:
# Use LLM to generate a consistent set of memories, etc to bootstrap an agent. Cache to a text file to save time.
def bootstrap_test_agent():
    pass

# Test the agent on a set of observations. Action space can be a set of possible responses at the level of detail of a choose-your-own-adventure game.
def test_agent():
    pass

# If this turns out to be a decent evaluation methodology, could optimize the agent architecture using a genetic algorithm or other optimization technique

# Text Adventure LLM Interface

In [14]:
from npc.interfaces.text_adventure_interface import TextAdventureInterface, TextAdventureRequest, TextAdventureResponse
from npc.simulators.text_adventure import TextAdventureSimulator

simulator = TextAdventureSimulator(small_llm)

In [15]:
# TODO: generalize this so that it does not have any text adventure specific code. The only mixing of simulator logic and agent logic should be in the interface

def run_text_adventure_simulation(simulator: TextAdventureSimulator, config: LLMConfig, max_steps: int = 3):
    simulator_interface = TextAdventureInterface(simulator)
    agent = Agent(simulator_interface=simulator_interface, llm_config=config)

    # TODO: don't touch simulator except through the interface
    simulator_response = TextAdventureResponse(
        success=True,
        message="Initial state",
        observation=simulator.state.observation,
        available_actions=simulator.state.available_actions,
    )

    for _ in range(max_steps):
        agent.update_state(simulator_response)
        simulator_request = agent.choose_action()
        simulator_response = simulator_interface.execute(simulator_request)
        
        if not simulator_response.success:
            print(f"Error: {simulator_response.message}")
            break
        
        if simulator.is_story_ended():
            print("Story has reached its conclusion.")
            break

    print("Adventure completed!")

run_text_adventure_simulation(simulator, LLMConfig(small_llm, small_llm), max_steps=2)

Updated working memory:
Based on the memory report and the current empty working memory, the following key elements are prioritized and
integrated into the updated working memory:  1. Urgent Situation: You are being pursued by powerful corporate enforcers,
suggesting a high-stakes conflict or confrontation with a megacorporation that rules the dystopian urban environment you
and goal-relevant piece of information that is now a key part of your working memory.  3. Gaps in Accessible Memories:
The lack of any retrieved memories at this time suggests that the information about your past actions and the events
leading up to this situation may be either not readily accessible or intentionally suppressed. This gap in your working
memory represents an important area for further retrieval and inference.  4. Heightened Emotional State: The observation
conveys a strong sense of unease, urgency, and the weight of consequences, with your heart racing and an acute awareness
of the risks and uncerta

In [None]:
# TODO: move most of the code out of __init__.py and into separate files

# Text Adventure Human Interface

In [22]:
from npc.simulators.text_adventure import TextAdventureSimulator

simulator = TextAdventureSimulator(small_llm)

In [23]:
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.prompt import IntPrompt

console = Console()

def print_wrapped_text(text: str):
    console.print(Markdown(text))

while not simulator.is_story_ended():
    state = simulator.state

    # Display the current game state
    console.rule("Game State", style="cyan")
    print_wrapped_text(state.observation)

    # Display available actions
    action_text = "\n\n".join([f"**Option {i}**  \n{action}" for i, action in state.available_actions.items()])
    actions_panel = Panel(Markdown(action_text), title="[bold cyan]Available Actions[/bold cyan]", border_style="bright_blue")
    console.print(actions_panel)

    # Take an action
    action_index = IntPrompt.ask("Choose an action", choices=[str(i) for i in state.available_actions.keys()])
    console.print(f"\nYou chose: [bold green]Option {action_index}[/bold green]\n")
    new_game_state = simulator.take_action(action_index)

KeyboardInterrupt: Interrupted by user