# Pinecone Vector Store - Auto Retriever

#### Creating a Pinecone Index

In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import openai
openai.api_base = "https://oai.hconeai.com/v1"


INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


In [3]:
import pinecone

api_key = os.environ['PINECONE_API_KEY']
pinecone.init(api_key=api_key, environment="eu-west1-gcp")

  from tqdm.autonotebook import tqdm


In [3]:
# dimensions are for text-embedding-ada-002
pinecone.create_index("quickstart-index", dimension=1536, metric="euclidean", pod_type="p1")

ApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=UTF-8', 'date': 'Thu, 11 May 2023 03:54:14 GMT', 'x-envoy-upstream-service-time': '492', 'content-length': '131', 'server': 'envoy'})
HTTP response body: The index exceeds the project quota of 1 pods by 1 pods. Upgrade your account or change the project settings to increase the quota.


In [4]:
pinecone_index = pinecone.Index("quickstart-index")

#### Load documents, build the PineconeVectorStore and GPTVectorStoreIndex

In [5]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores import PineconeVectorStore

In [6]:
# load documents
documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()

In [7]:
from llama_index.data_structs.node import Node

nodes = [
    Node('The Shawshank Redemption', extra_info={
        "author": "Stephen King",
        "theme": "Friendship",
    }),
    Node('The Godfather', extra_info={
        "director": "Francis Ford Coppola",
        "theme": "Mafia",
    }),
    Node("Inception", extra_info={
        "director": "Christopher Nolan",
    })
]

In [8]:
from langchain import OpenAI
from llama_index.indices.service_context import ServiceContext
from llama_index.llm_predictor.base import LLMPredictor


vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace='test_05_11')
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
  llm_predictor=LLMPredictor(
    llm=OpenAI(
      headers={
        "Helicone-Auth": "Bearer sk-ss3no7a-o2zeqpi-qlqeipy-it3e4zi"
      }
    )
  )
)

                    headers was transfered to model_kwargs.
                    Please confirm that headers is what you intended.
                    headers was transfered to model_kwargs.
                    Please confirm that headers is what you intended.


In [9]:
index = GPTVectorStoreIndex(nodes, storage_context=storage_context, service_context=service_context)

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 39 tokens
> [build_index_from_nodes] Total embedding token usage: 39 tokens


In [10]:

from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters


filters = MetadataFilters(
    filters=[
        ExactMatchFilter(key='theme', value='Mafia')
    ]
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve('What is inception about?')

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 5 tokens
> [retrieve] Total embedding token usage: 5 tokens


[NodeWithScore(node=Node(text='director: Francis Ford Coppola\ntheme: Mafia\n\nThe Godfather', doc_id='e1055b83-d7f5-47bb-b9bb-b8097791deb7', embedding=None, doc_hash='a1b8ddfd0393ce3259e625168434f5e20061c659b95f4859bc060a5292702bca', extra_info={'director': 'Francis Ford Coppola', 'doc_id': 'None', 'document_id': 'None', 'id': 'e1055b83-d7f5-47bb-b9bb-b8097791deb7', 'ref_doc_id': 'None', 'text': 'director: Francis Ford Coppola\ntheme: Mafia\n\nThe Godfather', 'theme': 'Mafia'}, node_info=None, relationships={}), score=0.768340707),
 NodeWithScore(node=Node(text='director: Francis Ford Coppola\ntheme: Mafia\n\nThe Godfather', doc_id='c95296bb-743e-430c-aa41-856fbcec9c47', embedding=None, doc_hash='008bc3fb72ea60a2a40842e552028832f973f4d42b23d42bc09b4117d4f65234', extra_info={'director': 'Francis Ford Coppola', 'doc_id': 'None', 'document_id': 'None', 'id': 'c95296bb-743e-430c-aa41-856fbcec9c47', 'ref_doc_id': 'None', 'text': 'director: Francis Ford Coppola\ntheme: Mafia\n\nThe Godfathe

In [11]:
from llama_index.indices.vector_store.auto_retriever.auto_retriever import VectorIndexAutoRetriever
from llama_index.indices.vector_store.auto_retriever.schema import MetadataInfo, VectorStoreInfo


vector_store_info = VectorStoreInfo(
    content_info='movie reviews',
    metadata_info=[
        MetadataInfo(name='author', type='str', description='Author of the movie'),
        MetadataInfo(name='director', type='str', description='Director of the movie'),
        MetadataInfo(name='theme', type='str', description='Theme of the movie')
    ]
)
retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)

In [12]:
retriever.retrieve('Sci-fi movie ranking')

AttributeError: 'list' object has no attribute 'filters'