### Load api key for pinecone and huggingface


In [1]:
import os
from dotenv import load_dotenv

load_dotenv('api.env')
HUGGINGFACE_API_KEY = os.environ['HUGGINGFACE_API_KEY']

PINECONE_API_KEY = os.environ['PINECONE_API_KEY']

### If you dont have index in pinecone, make an index using below 

In [None]:
from indox.vector_stores import PineconeVectorStore

PineconeVectorStore.create_index(
    index_name="testindex4",
    dimension=768,
    metric="cosine",
    cloud="aws",
    region="us-east-1"
)

### Embedding function

In [2]:
from indox.embeddings import HuggingFaceEmbedding
PINECONE_INDEX_NAME = 'testindex3'
embedding_function = HuggingFaceEmbedding(api_key=HUGGINGFACE_API_KEY,model="multi-qa-mpnet-base-cos-v1")

[32mINFO[0m: [1mInitialized HuggingFaceEmbedding with model: multi-qa-mpnet-base-cos-v1[0m


### Connecting to pinecone

In [3]:
from indox.vector_stores import PineconeVectorStore
try:
    pinecone_store = PineconeVectorStore(
        index_name=PINECONE_INDEX_NAME,
        embedding_function=embedding_function,
        text_key='content'
    )
    print(f"Successfully connected to Pinecone index: {PINECONE_INDEX_NAME}")
except Exception as e:
    print(f"Error connecting to Pinecone: {str(e)}")
    print("Please check your API key and index name.")
    exit(1)


Successfully connected to Pinecone index: testindex3


### Adding Documents

In [4]:
from indox.core import  Document
docs = [
    Document(page_content="The quick brown fox jumps over the lazy dog", metadata={"animal": "fox"}),
    Document(page_content="A journey of a thousand miles begins with a single step", metadata={"type": "proverb"}),
    Document(page_content="To be or not to be, that is the question", metadata={"type": "quote", "author": "Shakespeare"})
]

print("Adding documents...")
try:
    added_ids = pinecone_store.add(docs)
    print(f"Successfully added {len(added_ids)} documents.")
except Exception as e:
    print(f"Error adding documents: {str(e)}")

Adding documents...
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
Successfully added 3 documents.


### Performing similarity search

In [5]:
print("\nPerforming similarity search...")
query = "What did the fox do?"
results = pinecone_store.similarity_search(query, k=1)
for doc in results:
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print()


Performing similarity search...
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
Content: The quick brown fox jumps over the lazy dog
Metadata: {'id': 'fd5a41e6-ff07-4c70-ac4d-18ed6fed39d7', 'metadata': "{'animal': 'fox'}"}



### Testing deletion

In [6]:
results = pinecone_store.similarity_search("To be or not to be, that is the question",k=1)
if results:
    doc_to_delete = results[0]
    # print(doc_to_delete)
    id_to_delete = doc_to_delete.metadata.get('id')
    if id_to_delete:
        pinecone_store.delete([id_to_delete])
        print(f"Deleted document with content: {doc_to_delete.page_content}")
    else:
        print("No ID found for the document to delete")
else:
    print("No results found to delete")

# Verify deletion
print("\nVerifying deletion...")
new_results = pinecone_store.similarity_search(query, k=2)
for doc in new_results:
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print()

# Print total number of documents
print(f"Total documents in store: {len(pinecone_store)}")

[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
Deleted document with content: To be or not to be, that is the question

Verifying deletion...
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens