<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/WeaviateIndex_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Weaviate Vector Store Metadata Filter

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [None]:
%pip install llama-index-vector-stores-weaviate

In [None]:
!pip install llama-index weaviate-client

#### Creating a Weaviate Client

In [None]:
import os
import openai

os.environ["OPENAI_API_KEY"] = ""
openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
import weaviate

# cloud
cluster_url = ""
api_key = ""

client = weaviate.connect_to_wcs(
    cluster_url=cluster_url,
    auth_credentials=weaviate.auth.AuthApiKey(api_key),
)

# local
# client = weaviate.connect_to_local()

INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/meta "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/meta "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"
HTTP Request: GET https://pypi.org/pypi/weaviate-client/json "HTTP/1.1 200 OK"


#### Load documents, build the VectorStoreIndex

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from IPython.display import Markdown, display

## Metadata Filtering

Let's insert a dummy document, and try to filter so that only that document is returned.

In [None]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [None]:
from llama_index.core import StorageContext

vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="LlamaIndex_filter"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 404 Not Found"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 404 Not Found"
INFO:httpx:HTTP Request: POST https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema "HTTP/1.1 200 OK"
HTTP Request: POST https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/nodes "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/nodes "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/nodes "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dh

In [None]:
retriever = index.as_retriever()
retriever.retrieve("What is inception?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='df310070-1480-46c1-8ec0-1052c172905e', embedding=[0.0031030464451760054, -0.024837113916873932, -0.022581512108445168, -0.03652292117476463, -0.007072651758790016, 0.011845098808407784, -0.04032048583030701, -0.027602458372712135, -0.01594213955104351, 0.007690712343901396, 0.02783184126019478, 0.02994726411998272, 0.018847661092877388, -0.0044156285002827644, 0.004122527781873941, 0.004409256856888533, 0.027449535205960274, -0.007537790108472109, -0.0030807452276349068, -0.012775375507771969, -0.005791928619146347, -0.019370146095752716, 0.001938607543706894, 0.008990551345050335, 0.0020947156008332968, -0.012953785248100758, 0.013661050237715244, -0.029386550188064575, 0.015011862851679325, -0.019382888451218605, 0.022173719480633736, -0.009353741072118282, -0.0222119502723217, -0.009194447658956051, -0.009340997785329819, -0.004332795739173889, -0.011940675787627697, -0.02732210047543049, 0.01604408770799637, -0.00805390253663063, 0.0143237132579088

In [None]:
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", operator=FilterOperator.EQ, value="Mafia"),
    ]
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='34d778a1-b6bf-4a24-a1bf-ac659a9959ea', embedding=[-0.0017794573213905096, -0.023969227448105812, -0.01290263794362545, -0.035538844764232635, -0.00970841757953167, 0.02575497329235077, -0.0005831966991536319, 0.0009125220822170377, -0.02186909131705761, -0.0278173815459013, 0.023969227448105812, 0.018712596967816353, 0.028471317142248154, -0.0018627711106091738, 0.006259539630264044, 0.015468074008822441, 0.029024647548794746, -0.007985550910234451, 0.010418943129479885, -0.00027961216983385384, 0.010318337008357048, 0.006847452372312546, -0.029955245554447174, -0.0007384276250377297, 0.004885647911578417, -0.0011467438889667392, 0.004489514045417309, -0.026987388730049133, 0.021567273885011673, -0.017505332827568054, 0.012072643265128136, -0.024069832637906075, -0.006407303735613823, 0.0021127124782651663, 0.010173717513680458, -0.0029820057097822428, 0.005731361452490091, -0.010488108731806278, 0.0010052676079794765, 0.014700958505272865, 0.014021872

In [None]:
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Mafia"),
        MetadataFilter(key="year", value=1972),
    ]
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='34d778a1-b6bf-4a24-a1bf-ac659a9959ea', embedding=[-0.0017794573213905096, -0.023969227448105812, -0.01290263794362545, -0.035538844764232635, -0.00970841757953167, 0.02575497329235077, -0.0005831966991536319, 0.0009125220822170377, -0.02186909131705761, -0.0278173815459013, 0.023969227448105812, 0.018712596967816353, 0.028471317142248154, -0.0018627711106091738, 0.006259539630264044, 0.015468074008822441, 0.029024647548794746, -0.007985550910234451, 0.010418943129479885, -0.00027961216983385384, 0.010318337008357048, 0.006847452372312546, -0.029955245554447174, -0.0007384276250377297, 0.004885647911578417, -0.0011467438889667392, 0.004489514045417309, -0.026987388730049133, 0.021567273885011673, -0.017505332827568054, 0.012072643265128136, -0.024069832637906075, -0.006407303735613823, 0.0021127124782651663, 0.010173717513680458, -0.0029820057097822428, 0.005731361452490091, -0.010488108731806278, 0.0010052676079794765, 0.014700958505272865, 0.014021872

In [None]:
from llama_index.core.vector_stores import FilterOperator, FilterCondition


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"
HTTP Request: GET https://llamaindex-pythonv4-dhqgeqxq.weaviate.network/v1/schema/LlamaIndex_filter "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='b9a4dffd-b9f1-4d83-9c13-f4402d1036b8', embedding=[0.012515314854681492, -0.014948848634958267, -0.04071340337395668, -0.006991580594331026, -0.010674070566892624, 0.016596956178545952, -0.029305409640073776, -0.050885315984487534, -0.021270886063575745, -0.01666133478283882, 0.024966251105070114, 0.013841526582837105, 0.017202120274305344, 0.0007604792481288314, -0.010571063496172428, -0.000707366387359798, 0.022494090721011162, -0.01047449465841055, 0.01530937198549509, -0.014923096634447575, -0.016712838783860207, -0.009611813351511955, -0.008382171392440796, 0.010004526935517788, -0.010493808425962925, -0.0017655993578955531, 0.02235245518386364, -0.04220699891448021, 0.019970426335930824, 0.0035215418320149183, 0.00806027464568615, -0.0053756628185510635, -0.025931939482688904, -0.022506965324282646, -0.03512528911232948, 0.00804739911109209, -0.026833247393369675, -0.009341420605778694, 0.00688857352361083, -0.0037597448099404573, 0.03002645634114