<a href="https://colab.research.google.com/github/skaiworlwide-oss/agensgraph-ai/blob/main/llama-index/examples/vector_stores/agensgraph_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Agensgraph Vector Store - Metadata Filter

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [43]:
%%capture
# %pip install /path/to/llama_index_agensgraph-0.1.0-py3-none-any.whl
%pip install llama_index simplejson llama-index-llms-azure-openai llama-index-embeddings-azure-openai

In [44]:
#OPTIONAL

# import logging
# import sys
# import os

# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

Build a Agensgraph vector Index and connect to it

In [45]:
import os
from llama_index_agensgraph.vector_stores.agensgraph.base import AgensgraphVectorStore

url = "postgresql://username:password@host:port/database"  # Replace with your actual connection string
embed_dim = 1536

vector_store = AgensgraphVectorStore(url=url, embedding_dimension=embed_dim)

Set the environment variables

In [46]:
from llama_index.core import VectorStoreIndex, StorageContext

os.environ["OPENAI_API_KEY"] = "sk-..."

# In case of Azure OpenAI, set the following
# from llama_index.core import Settings
# from llama_index.llms.azure_openai import AzureOpenAI
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

# llm = AzureOpenAI(
#     deployment_name="",
#     model="",
#     api_key="",
#     azure_endpoint="",
#     api_version="",
# )
# embedding = AzureOpenAIEmbedding(
#     deployment_name="",
#     model="",
#     api_key="",
#     azure_endpoint="",
#     api_version=""
# )
# Settings.llm = llm
# Settings.embed_model = embedding

Build the VectorStoreIndex

In [47]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [48]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

Define metadata filters

In [49]:
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(
            key="theme", operator=FilterOperator.EQ, value="Fiction"
        ),
    ]
)

Retrieve from vector store with filters

In [50]:
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

[NodeWithScore(node=TextNode(id_='1d53f175-5ff1-43e2-bfbd-478713d0a485', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Inception', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.406222468243691)]

Multiple Metadata Filters with `AND` condition

In [51]:
from llama_index.core.vector_stores import FilterOperator, FilterCondition

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.AND,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

[NodeWithScore(node=TextNode(id_='1d53f175-5ff1-43e2-bfbd-478713d0a485', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Inception', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.1951608417976154)]

Multiple Metadata Filters with `OR` condition

In [52]:
from llama_index.core.vector_stores import FilterOperator, FilterCondition


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

[NodeWithScore(node=TextNode(id_='0c4740c9-785e-489c-89ec-8b0cdb954e38', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="Harry Potter and the Sorcerer's Stone", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.5251325838386345)]