In [None]:
! pip install langchain-anthropic langgraph

In [1]:
import os, getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("ANTHROPIC_API_KEY")

## Memory Collection

You can think of this as a collection of facts or "semantic" memory. 

These are things that the LLM extracts over time from the user's journal. 

Let's build on what we did in the previous lessons.

First, we define a general schema for any memory.

The schema has a `memory_type` and a `memory_content`.

![](./img/memory_course_semantic_collection.png)


In [1]:
from pydantic import BaseModel, Field
from typing import List
from langchain_anthropic import ChatAnthropic

# Schema for structured output
class Memory(BaseModel):
    memory_type: str = Field(None, description="Type of memory to extract.")
    memory_content: str = Field(None, description="Specific content of the memory.")

class Memories(BaseModel):
    memories: List[Memory] = Field(None, description="List of memories to extract.")

# Define and augment LLM with structured output
llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
structured_llm = llm.with_structured_output(Memories)

Now let's load our instructions for memory extraction and examples.

In [None]:
import sys
sys.path.append('..')
import src.memory_course.prompts
import src.memory_course.examples
import src.memory_course.utils

# Reload the module in case any changes were made
import importlib
importlib.reload(src.memory_course.prompts)
importlib.reload(src.memory_course.examples)
importlib.reload(src.memory_course.utils)

# Then import the instructions and examples
from src.memory_course.utils import format_few_shot_examples
from src.memory_course.examples import example_input, example_output
from src.memory_course.prompts import memory_collection_extraction_instructions, memory_search_instructions, collection_extraction_input

In [3]:
# Save instructions to the store
from datetime import datetime
from langgraph.store.memory import InMemoryStore

in_memory_store = InMemoryStore()
namespace = ("journal","instructions")
key = "instructions_extraction"
in_memory_store.put(namespace, key, {"instructions": memory_collection_extraction_instructions})

namespace = ("journal","examples")
key = f"example_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
in_memory_store.put(namespace, key, {"input": example_input, "output": example_output})

As before, we define a function to extract memories from a journal entry that uses both instruction and few shot examples.

In [5]:
from langchain_core.messages import HumanMessage, SystemMessage

def extract_memories(journal_entry: str) -> Memories:
    """Extract memories from a journal entry.
    
    This function analyzes a journal entry and extracts memories
    
    Args:
        journal_entry (str): The text of the journal entry to analyze
        
    Returns:
        Memories (list): A structured list of Memory objects"""

    # Get the instructions from the store
    namespace = ("journal","instructions")
    key = "instructions_extraction"
    instructions = in_memory_store.get(namespace, key)

    # Get the examples from the store
    few_shot_examples = in_memory_store.search(("journal", "examples"))
    few_shot_examples_formatted = format_few_shot_examples(few_shot_examples)

    # Format the instructions
    memory_extraction_instructions_formatted = collection_extraction_input.format(
        procedural_memory_instructions=instructions.value['instructions'],
        few_shot_examples_formatted=few_shot_examples_formatted
    )

    # Extract memories
    memories = structured_llm.invoke([SystemMessage(content=memory_extraction_instructions_formatted),
                                      HumanMessage(content=f"Extract memory from <context> {journal_entry} </context>")])
    
    return memories

In [6]:
# Test extraction of memories
journal_entry = """
Really productive coding session this morning! The new refactoring approach is working better than expected, though I hit a few snags with the database migrations.

Important tasks for this week:
- Set up meeting with DevOps team about deployment strategy
- Fix unit tests for the payment module by Friday
- Order new laptop charger
- Update documentation for API changes

During the team standup, had a fascinating idea about improving our testing workflow. What if we implemented automatic test generation using LLMs? Could save hours of manual test writing. Would need to evaluate cost and accuracy first.

Even though there's a lot on my plate, feeling pretty confident about the sprint goals. The recent architecture changes are already showing positive results."""

# Run extraction
memories = extract_memories(journal_entry)

# Review
for memory in memories.memories:
    print("=== Extracted Memory ===")
    print(f"\n{memory.memory_type.upper()}:")
    print(f"- {memory.memory_content}\n")

=== Extracted Memory ===

SENTIMENT:
- Highly positive and confident mood despite workload. Enthusiasm about technical progress and sprint goals. Key phrases: 'productive', 'working better than expected', 'feeling pretty confident', 'positive results'

=== Extracted Memory ===

TODO:
- Set up meeting with DevOps team about deployment strategy

=== Extracted Memory ===

TODO:
- Fix unit tests for the payment module (Deadline: Friday)

=== Extracted Memory ===

TODO:
- Order new laptop charger

=== Extracted Memory ===

TODO:
- Update documentation for API changes

=== Extracted Memory ===

IDEA:
- Implement automatic test generation using LLMs to reduce manual test writing time. Requirements: Evaluate cost and accuracy feasibility



With memories extracted from the journal entry, we can store them easily.

In [16]:
import uuid

def store_memory_collection(memories):
    """Store a collection of extracted memories in the memory store.
        
    Args:
        memories (List[Memory]): List of Memory objects, where each Memory has:
            - memory_type: str, one of ["SENTIMENT", "TODO", "IDEA"]
            - memory_content: str, the extracted content
            - timestamp: str, ISO format timestamp
            
    Returns:
        None: Memories are stored in the in-memory store as a side effect
    """

    # Iterate through each memory and store in appropriate collection
    for memory in memories.memories:

        # Generate a unique UUID for each memory
        memory_id = str(uuid.uuid4())
        
        # Create namespace tuple with the specific collection type (memory_type)
        namespace = ("journal", "memory", "collection", memory.memory_type.lower())
        
        # Create the key using the UUID
        key = f"memory_{memory_id}"
        
        # Create the value object according to the schema
        value = {
            "memory": memory.memory_content,  #
            "timestamp": datetime.now().isoformat(),
        }
        
        # Store in the database
        in_memory_store.put(namespace, key, value)

store_memory_collection(memories)

In [9]:
# Search
namespace = ("journal", "memory", "collection", "todo")
in_memory_store.search(namespace)

[Item(namespace=['journal', 'semantic', 'collection', 'todo'], key='memory_5951f267-9b73-4198-9b17-c017ffc71808', value={'memory': 'Set up meeting with DevOps team about deployment strategy', 'timestamp': '2025-02-03T13:48:57.451657'}, created_at='2025-02-03T21:48:57.451663+00:00', updated_at='2025-02-03T21:48:57.451664+00:00', score=None),
 Item(namespace=['journal', 'semantic', 'collection', 'todo'], key='memory_9337cc2e-d7b3-4351-90f0-902acd7dffb1', value={'memory': 'Fix unit tests for the payment module (Deadline: Friday)', 'timestamp': '2025-02-03T13:48:57.451675'}, created_at='2025-02-03T21:48:57.451680+00:00', updated_at='2025-02-03T21:48:57.451681+00:00', score=None),
 Item(namespace=['journal', 'semantic', 'collection', 'todo'], key='memory_0fb65069-c8ea-4b22-b06e-7c83e15ca681', value={'memory': 'Order new laptop charger', 'timestamp': '2025-02-03T13:48:57.451695'}, created_at='2025-02-03T21:48:57.451700+00:00', updated_at='2025-02-03T21:48:57.451700+00:00', score=None),
 Item

Now, we also want the ability to search for memories across all collections in natural language.

In [20]:
# Schema for structured output
class MemorySearch(BaseModel):
    collection: str=Field(None, description="Name of the memory collection to search" )

In [23]:
def get_collection_to_search(user_input):
    """Get the collection to search based on user input."""

    # Get the last element of each namespace tuple which represents the collection type
    namespace = ("journal", "memory", "collection")
    all_memories = in_memory_store.search(namespace)
    unique_collections = sorted({item.namespace[-1] for item in all_memories})

    # Format the prompt
    structured_llm = llm.with_structured_output(MemorySearch)
    search_instructions_formatted = memory_search_instructions.format(available_collections=unique_collections,  
                                                               memory_classification_prompt=memory_collection_extraction_instructions)
    # Get the collection to search
    collection = structured_llm.invoke([SystemMessage(content=search_instructions_formatted), HumanMessage(content=f"<User Input>{user_input}</User Input>")])
    return collection.collection

get_collection_to_search("What are my ToDos for the day?")


'todo'

Now, we can easily search for memories in the given collection.

In [25]:
# Extract semantic collection
def search_and_format_semantic_collection(collection_type):
    """ Format episodic memories for few shot examples"""

    # Search the collection
    collection = in_memory_store.search(("journal", "semantic", "collection", collection_type))

    # Sort items by creation timestamp
    sorted_items = sorted(collection, key=lambda x: x.created_at)
    
    # Extract memory contents
    memories = []
    for item in sorted_items:
        if isinstance(item.value.get('memory'), Memory):
            memories.append(item.value['memory'].memory_content)
        else:
            memories.append(item.value.get('memory'))
    
    output = [
        f"Collection Type: {collection_type.upper()}",
        "Sorted by: Creation Time (Oldest to Newest)",
        "\nItems:",
        *[f"• {memory}" for memory in memories]
    ]
    
    return "\n".join(output)

collection_type = get_collection_to_search("What are my ToDos for the day?")
collection_formatted = search_and_format_semantic_collection(collection_type)

In [26]:
collection_formatted

'Collection Type: TODO\nSorted by: Creation Time (Oldest to Newest)\n\nItems:\n• Set up meeting with DevOps team about deployment strategy\n• Fix unit tests for the payment module (Deadline: Friday)\n• Order new laptop charger\n• Update documentation for API changes'

So now we can:

1. Extract memories from a journal entry given a set of instructions (system message)
2. Update those instructions based upon user feedback directly 
3. Add few shot examples to improve the quality of the extraction
4. Update those few shot examples based upon user feedback directly 
5. Save a collection of extracted memories to the store
6. Search for memories in a given collection in natural language

This is a simple example of using and editing both "procedural memory" (system prompt/instructions) and "episodic memory" (few shot examples). 

In addition, we save "semantic memories" (facts) about the users in specific collections.

And we can search for memories in a given collection in natural language. 
