<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/pinecone_metadata_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pinecone Vector Store - Metadata Filter

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [None]:
%pip install llama-index-vector-stores-pinecone

In [None]:
# !pip install llama-index>=0.9.31 pinecone-client>=3.0.0

In [None]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
import os

os.environ[
    "PINECONE_API_KEY"
] = "<Your Pinecone API key, from app.pinecone.io>"
os.environ["OPENAI_API_KEY"] = "sk-..."

Build a Pinecone Index and connect to it

In [None]:
from pinecone import Pinecone
from pinecone import ServerlessSpec

api_key = os.environ["PINECONE_API_KEY"]
pc = Pinecone(api_key=api_key)

In [None]:
# delete if needed
# pc.delete_index("quickstart-index")

In [None]:
# Dimensions are for text-embedding-ada-002
pc.create_index(
    "quickstart-index",
    dimension=1536,
    metric="euclidean",
    spec=ServerlessSpec(cloud="aws", region="us-west-2"),
)

In [None]:
pinecone_index = pc.Index("quickstart-index")

Build the PineconeVectorStore and VectorStoreIndex

In [None]:
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore

In [None]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
            "year": 1994,
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
            "year": 1972,
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
            "theme": "Fiction",
            "year": 2010,
        },
    ),
    TextNode(
        text="To Kill a Mockingbird",
        metadata={
            "author": "Harper Lee",
            "theme": "Mafia",
            "year": 1960,
        },
    ),
    TextNode(
        text="1984",
        metadata={
            "author": "George Orwell",
            "theme": "Totalitarianism",
            "year": 1949,
        },
    ),
    TextNode(
        text="The Great Gatsby",
        metadata={
            "author": "F. Scott Fitzgerald",
            "theme": "The American Dream",
            "year": 1925,
        },
    ),
    TextNode(
        text="Harry Potter and the Sorcerer's Stone",
        metadata={
            "author": "J.K. Rowling",
            "theme": "Fiction",
            "year": 1997,
        },
    ),
]

In [None]:
vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index, namespace="test_05_14"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Upserted vectors:   0%|          | 0/7 [00:00<?, ?it/s]

Define metadata filters

In [None]:
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(
            key="theme", operator=FilterOperator.EQ, value="Fiction"
        ),
    ]
)

Retrieve from vector store with filters

In [None]:
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='7fed3d0b-e2d7-432a-9231-3ac02130c00f', embedding=[0.00310940156, -0.0246712118, -0.0222742166, -0.0364649445, -0.00715911388, 0.0117236068, -0.0400604382, -0.0275654588, -0.0157589763, 0.00740136392, 0.0278459582, 0.029962454, 0.0187679715, -0.00440511806, 0.00412780605, 0.00442743069, 0.0276674572, -0.00760536361, -0.00303608901, -0.012947605, -0.00570242899, -0.01923972, 0.00208462193, 0.0092118606, 0.00205274695, -0.0128328558, 0.0136297289, -0.0292994548, 0.0149939768, -0.0193289705, 0.0219937172, -0.00944136083, -0.0221977159, -0.00928198546, -0.00940948538, -0.00429993076, -0.0119786067, -0.0272339582, 0.0158864763, -0.0080261128, 0.0143437283, 0.00407999381, -0.00648655277, -0.0170722231, 0.00394293154, -0.0149939768, -0.00831298716, -0.0212669671, -0.0228479654, 0.0105123585, 0.0167662241, 0.0527849197, -0.0421514362, -0.0262139607, -0.00156506011, -0.00443380559, -0.00724836392, 0.00597655354, 0.0354959443, -0.00842773728, -0.00259143347, -0.0

Multiple Metadata Filters with `AND` condition

In [None]:
from llama_index.core.vector_stores import FilterOperator, FilterCondition

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.AND,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='7fed3d0b-e2d7-432a-9231-3ac02130c00f', embedding=[0.00310940156, -0.0246712118, -0.0222742166, -0.0364649445, -0.00715911388, 0.0117236068, -0.0400604382, -0.0275654588, -0.0157589763, 0.00740136392, 0.0278459582, 0.029962454, 0.0187679715, -0.00440511806, 0.00412780605, 0.00442743069, 0.0276674572, -0.00760536361, -0.00303608901, -0.012947605, -0.00570242899, -0.01923972, 0.00208462193, 0.0092118606, 0.00205274695, -0.0128328558, 0.0136297289, -0.0292994548, 0.0149939768, -0.0193289705, 0.0219937172, -0.00944136083, -0.0221977159, -0.00928198546, -0.00940948538, -0.00429993076, -0.0119786067, -0.0272339582, 0.0158864763, -0.0080261128, 0.0143437283, 0.00407999381, -0.00648655277, -0.0170722231, 0.00394293154, -0.0149939768, -0.00831298716, -0.0212669671, -0.0228479654, 0.0105123585, 0.0167662241, 0.0527849197, -0.0421514362, -0.0262139607, -0.00156506011, -0.00443380559, -0.00724836392, 0.00597655354, 0.0354959443, -0.00842773728, -0.00259143347, -0.0

Multiple Metadata Filters with `OR` condition

In [None]:
from llama_index.core.vector_stores import FilterOperator, FilterCondition


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Fiction"),
        MetadataFilter(key="year", value=1997, operator=FilterOperator.GT),
    ],
    condition=FilterCondition.OR,
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("Harry Potter?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='6b7bdb44-9dc2-48f6-b9cb-05dcaea4889b', embedding=[0.0131883333, -0.0137667693, -0.0396421254, -0.00729471678, -0.0107653309, 0.0171731133, -0.0282276608, -0.0507480912, -0.0210422054, -0.01610622, 0.0254768785, 0.0143452045, 0.0170060098, 0.00128059229, -0.0103797065, -0.000858012936, 0.0219162852, -0.0112602143, 0.015424951, -0.0149622029, -0.0174430497, -0.00926139764, -0.00690909289, 0.0097370008, -0.0101161972, -0.00156177639, 0.0225204285, -0.0414159931, 0.0201038532, 0.00370198837, 0.00786672533, -0.00535053, -0.0264537912, -0.0238444041, -0.0343205184, 0.00850300491, -0.0269165393, -0.00934494939, 0.00577793, -0.0037951807, 0.0304385703, 0.0138181858, -0.0188184399, -0.0150521817, -0.0169160292, -0.0138567481, 0.011915775, -0.0171988215, -0.0302843209, 0.00484921923, 0.017700132, 0.0325209387, -0.0286389925, -0.023600176, -0.00468532881, 0.000876490725, -0.00141154369, 0.00646241195, 0.00751323672, -0.016556114, -0.0100455, -0.00316372188, -0.00

Use keyword arguments specific to pinecone

In [None]:
retriever = index.as_retriever(
    vector_store_kwargs={"filter": {"theme": "Mafia"}}
)
retriever.retrieve("What is inception about?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[NodeWithScore(node=TextNode(id_='54f65cf7-a12f-4366-88fe-6239c71920d2', embedding=[-0.00178836891, -0.0238407217, -0.0128082475, -0.0354147442, -0.00969749317, 0.0257046539, -0.000490778184, 0.000809174, -0.0218256582, -0.0278834421, 0.0238407217, 0.01876528, 0.0284375846, -0.0019048648, 0.00637263851, 0.0153522659, 0.029117668, -0.00807284843, 0.0104090627, -0.000399667391, 0.0102390414, 0.00693307817, -0.0297725648, -0.000678116106, 0.00477633, -0.00108309672, 0.00440165447, -0.0270018522, 0.021548586, -0.0175058655, 0.0120022222, -0.0240674149, -0.00652376842, 0.0020103408, 0.0100942096, -0.003102883, 0.00582164479, -0.0105350045, 0.000989427674, 0.0146092111, 0.0140172858, 0.00744314119, -0.0082617607, -0.0168761574, 0.0058814669, -0.00278488081, 0.0226190891, -0.0117125567, -0.0136142736, 0.0145840226, 0.00707791094, 0.0314853676, -0.0147855291, -0.0302259531, 0.0201254468, 0.00941412523, 0.00496524246, -0.0163472034, 0.00379083841, -0.0177955311, 0.0108057782, 0.00447092252, -0.