In [2]:
import os
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings

`Init Pinecone`

In [3]:
# Init
pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY'),  
    environment="us-east1-gcp"  
)

# Connect to index
embeddings = OpenAIEmbeddings()
index_name = "langchain-test"
vectorstore = Pinecone.from_existing_index(index_name=index_name,embedding=embeddings)

`Test pipeline`

In [4]:
from langchain.docstore.document import Document
from langchain.document_manager.example_interface import _TEST_DOCUMENTS
from langchain.document_manager.in_memory import InMemoryDocumentManager
from langchain.text_splitter import CharacterTextSplitter

def create_document_manager():
    document_manager = InMemoryDocumentManager(CharacterTextSplitter(separator=" "))
    return document_manager

def add_documents(document_manager):
    ops = document_manager.add(_TEST_DOCUMENTS, ['1', '2', '3'])
    return document_manager

def update_documents(document_manager):
    ops = document_manager.update([Document(page_content='This is a modified test document.')], ['1'])
    return document_manager

def update_truncate_documents(document_manager):
    ops = document_manager.update_truncate([Document(page_content='This is the final test document.'),
                                            Document(page_content='This is another final test document')], 
                                           ['1', '2'])
    return document_manager

def document_manager_pipeline():
    document_manager = create_document_manager()
    document_manager = add_documents(document_manager)
    document_manager = update_documents(document_manager)
    document_manager = update_truncate_documents(document_manager)
    return document_manager

`Test vectorDB upsert`

In [5]:
# Run
d = document_manager_pipeline()

# Check 
for doc,hash_ in d.lazy_load_all_docs():
    print(f"Document id: {doc.page_content}, hash: {hash_}")

# Upsert
d.add_documents_to_vectorstore(vectorstore)

Document id: This is a test document., hash: 53a92d14131d800674aa83617843757f860d07dc33c2ced1e1391183908945d3
Document id: This is another test document., hash: 669ee43d4936ba74cee191c8552885aa98e40b4d17d630eedb9fc64bf0ec07b1
Document id: This is a third test document., hash: e664370f801474e643a87f3f903af1280eda09c3db80a90ca8cda1818d2250d2


Upserted vectors:   0%|          | 0/1 [00:00<?, ?it/s]

Upserted vectors:   0%|          | 0/1 [00:00<?, ?it/s]

Upserted vectors:   0%|          | 0/1 [00:00<?, ?it/s]

`Concern`

If we specify `hash` as `id` on write, I expect that the vectors are simply updated. But need to confirm.

`vectorstore.add_texts(texts=[doc.page_content],metadatas=[doc.metadata],ids=[doc_hash])`

I do not see an option for vector removal! 