In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# pinecone

> Pinecone is a vector database. You can query it using vectors to find semantically similar other vectors quickly. Semantic search using vector databases is often used to locate relevant text that is added to llm prompts, enabling long term 'memory'.  

In [None]:
#| default_exp pinecone

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
def foo(): pass

In [None]:
#| hide
import os, 
import jarvis.secrets

In [None]:
#| hide
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
PINECONE_ENV = os.environ["PINECONE_ENV"]
PINECONE_TEST_NAMESPACE = os.environ["PINECONE_TEST_NAMESPACE"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [None]:
EMBEDDING_LENGTH = 1536

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader

In [None]:
from langchain.document_loaders import TextLoader

In [None]:
loader = TextLoader('../examples/data/pg_essay_beyond_smart.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = text_splitter.split_documents(documents)[:10] #limit length to 10 for this example.
print(len(docs))

embeddings = OpenAIEmbeddings()

Created a chunk of size 549, which is longer than the specified 500
Created a chunk of size 667, which is longer than the specified 500
Created a chunk of size 767, which is longer than the specified 500


10


In [None]:
import pinecone 

  from tqdm.autonotebook import tqdm


In [None]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENV  # next to api key in console
)

index_name = "pinecone-index-1"

docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name, namespace=PINECONE_TEST_NAMESPACE)

query = "What are the keys to being smart?"
docs = docsearch.similarity_search(query)

In [None]:
print(docs[0].page_content)

Why do so many smart people fail to discover anything new? Viewed from that direction, the question seems a rather depressing one. But there's another way to look at it that's not just more optimistic, but more interesting as well. Clearly intelligence is not the only ingredient in having new ideas. What are the other ingredients? Are they things we could cultivate?


In [None]:
import pinecone

In [None]:
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
index = pinecone.Index("pinecone-index-1")

query_response = index.query(
    namespace=PINECONE_TEST_NAMESPACE,
    top_k=10,
    include_values=True,
    include_metadata=True,
    vector=[0.1]*1536,
    filter=None
)
print(len(query_response.get('matches',[])))
# query_response

10


**Pinecone query and response**
```
query_response = index.query(
    namespace='',
    top_k=10,
    include_values=True,
    include_metadata=True,
    vector=[0.1]*1536,
    filter=None
)

RESPONSE:
{'matches': [{'id': '263a129b-e46d-4102-a093-379575b45947',
              'metadata': {'source': '../examples/data/pg_essay_beyond_smart.txt',
                           'text': 'Notes\n'
                                   '\n'
                                   '[1] What wins in conversation depends on '
                                   'who with. It ranges from mere '
                                   'aggressiveness at the bottom, through '
                                   'quick-wittedness in the middle, to '
                                   'something closer to actual intelligence at '
                                   'the top, though probably always with some '
                                   'component of quick-wittedness.'},
              'score': -0.0292761475,
              'values': [-0.0114761228,
                         0.0137850493,
                         -0.0456852466]}],
 'namespace': ''}
 ```

In [None]:
ids = []
for query_result_item in query_response.get('matches',[]):
    ids.append(query_result_item['id'])
if len(ids) > 0: index.delete(ids=ids, namespace=PINECONE_TEST_NAMESPACE)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()