# Pinecone Vector Store - Metadata Filter

#### Creating a Pinecone Index

In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import pinecone

api_key = os.environ['PINECONE_API_KEY']
pinecone.init(api_key=api_key, environment="eu-west1-gcp")

  from tqdm.autonotebook import tqdm


In [3]:
# dimensions are for text-embedding-ada-002
pinecone.create_index("quickstart-index", dimension=1536, metric="euclidean", pod_type="p1")

ApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=UTF-8', 'date': 'Wed, 10 May 2023 20:10:35 GMT', 'x-envoy-upstream-service-time': '358', 'content-length': '131', 'server': 'envoy'})
HTTP response body: The index exceeds the project quota of 1 pods by 1 pods. Upgrade your account or change the project settings to increase the quota.


In [4]:
pinecone_index = pinecone.Index("quickstart-index")

#### Load documents, build the PineconeVectorStore and GPTVectorStoreIndex

In [5]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores import PineconeVectorStore

INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


In [6]:
# load documents
documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()

In [7]:
from llama_index.data_structs.node import Node

nodes = [
    Node('The Shawshank Redemption', extra_info={
        "author": "Stephen King",
        "theme": "Friendship",
    }),
    Node('The Godfather', extra_info={
        "director": "Francis Ford Coppola",
        "theme": ["Mafia", "Family"],
    }),
    Node("Inception", extra_info={
        "director": "Christopher Nolan",
    })
]

In [15]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace='test_05_10')
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [16]:
index = GPTVectorStoreIndex(nodes, storage_context=storage_context)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 45 tokens
> [build_index_from_nodes] Total embedding token usage: 45 tokens


In [17]:

from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters


filters = MetadataFilters(
    [
        ExactMatchFilter(key='theme', value='Mafia')
    ]
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve('What is inception about?')

{'theme': 'Mafia'}
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 5 tokens
> [retrieve] Total embedding token usage: 5 tokens


[NodeWithScore(node=Node(text="director: Francis Ford Coppola\ntheme: ['Mafia', 'Family']\n\nThe Godfather", doc_id='323fbb53-e32c-4044-87a3-7741c8e520b5', embedding=None, doc_hash='d3571e3753428ef1f34f702646320b7b16d20f0e1c73fa60d23dd07614c9d1d2', extra_info={'director': 'Francis Ford Coppola', 'doc_id': 'None', 'document_id': 'None', 'id': '323fbb53-e32c-4044-87a3-7741c8e520b5', 'ref_doc_id': 'None', 'text': "director: Francis Ford Coppola\ntheme: ['Mafia', 'Family']\n\nThe Godfather", 'theme': ['Mafia', 'Family']}, node_info=None, relationships={}), score=0.763082862)]