# Types of Memories


In [45]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [46]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Settings

llm = OpenAI(model="gpt-4.1-mini")
embedding = OpenAIEmbedding(model="text-embedding-3-small")
Settings.embed_model = embedding

In [47]:
from llama_index.core.tools import FunctionTool


def get_weather(location: str) -> str:
    """Usfeful for getting the weather for a given location."""
    return f"The weather at {location} is very nice with not much rain."

tool = FunctionTool.from_defaults(
    get_weather,
)

In [48]:
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.core.memory import Memory, InsertMethod
from llama_index.core.memory import (
    StaticMemoryBlock,
    FactExtractionMemoryBlock,
    VectorMemoryBlock,
)

static_memory_block = StaticMemoryBlock(
    name="core_info",
    priority=0,
    static_content="Name: John Doe, Age: 30, Location: New York, Occupation: Software Engineer"
)
facts_block = FactExtractionMemoryBlock(
    name="facts",
    priority=1,
    llm=llm,
    max_facts=50,
)

In [49]:
# First, start Qdrant locally using Docker (run this in terminal):
# docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant

from qdrant_client import QdrantClient
from qdrant_client.http import models
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding(model="text-embedding-3-small")

def setup_qdrant_with_collection(collection_name="vector_memory", vector_size=1536):
    """Setup Qdrant client and create collection if it doesn't exist"""
    try:
        # Try to connect to local Qdrant
        client = QdrantClient(
            url="http://localhost:6333",
        )
        
        # Check if collection exists, if not create it
        try:
            collection_info = client.get_collection(collection_name)
            print(f"Collection '{collection_name}' already exists")
        except Exception: # Collection doesn't exist, create it
            client.create_collection(
                collection_name=collection_name,
                vectors_config=models.VectorParams(
                    size=vector_size,  # This should match your embedding dimension
                    distance=models.Distance.COSINE
                )
            )
            print(f"Created collection '{collection_name}'")
        
        # Create the vector store
        vector_store = QdrantVectorStore(
            collection_name=collection_name,
            client=client,
            prefer_grpc=False,  # Use HTTP instead of gRPC for simplicity
        )
        
        return vector_store, client
        
    except Exception as e:
        print(f"Error connecting to Qdrant: {e}")
        return None, None

# Setup Qdrant
vector_store, qdrant_client = setup_qdrant_with_collection()

if vector_store is not None:
    vector_memory_block = VectorMemoryBlock(
        name="vector_memory",
        vector_store=vector_store,
        embed_model=embed_model,
        priority=2,
    )
    print("VectorMemoryBlock created successfully with Qdrant!")
else:
    print("Skipping VectorMemoryBlock creation due to Qdrant connection issues")
    vector_memory_block = None

Collection 'vector_memory' already exists
VectorMemoryBlock created successfully with Qdrant!


We've setup three memory blocks:

- `core_info`: A static memory block that stores some core information about the user. The static content can either be a string or a list of `ContentBlock` objects like `TextBlock`, ImageBlock, etc. This information will always be inserted into the memory.
- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the llm to use to extarct facts from the flushed chat history, and set the `max_facts` to 50. If the number of extracted facts exceeds this limit, the max\_`facts will be automatically summarized and reduced to leave room for new information.
- `vector_memory`: A vector memory block that will store and retrieve batches of chat messages from a vector database. Each batch is a list of the flushed chat messages. Here we've passed in the `vector_store` and `embed_model` to use to store and retrieve the chat messages.

You'll also notice that we've set the `priority` for each block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the Memory object.

When memory blocks get too long, they are automatically "truncated". By default, this just means they are removed from memory until there is room again. This can be customized with subclasses of memory blocks that implement their own truncation logic.

- `priority=0`: This block will always be kept in memory.
- `priority=1, 2, 3`, etc: This determines the order in which memory blocks are truncated when the memory exceeds the token limit, to help the overall short-term memory + long-term memory content be less than or equal to the token_limit.


In [50]:
memory = Memory.from_defaults(
    session_id="my_session",
    token_limit=40000,
    memory_blocks=[
        static_memory_block,
        facts_block,
        vector_memory_block,
    ],
    insert_method="system",
)

As the memory is used, the short-term memory will fill up. Once the short-term memory exceeds the `chat_history_token_ratio`, the oldest messages that fit into the `token_flush_size` will be flushed and sent to each memory block for processing.

When memory is retrieved, the short-term and long-term memories are merged together. The Memory object will ensure that the short-term memory + long-term memory content is less than or equal to the token_limit. If it is longer, the `.truncate()` method will be called on the memory blocks, using the priority to determine the truncation order.


In [51]:
messages = memory.get_all()

for message in messages:
    print(message)