In [1]:
from dotenv import load_dotenv
from pinecone import Pinecone
import os
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")
pinecone_api_key=os.getenv("PINECONE_API_KEY")

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
pinecone_api_key

'pcsk_3Dm9T3_A8cFarMo9nBZfQG4Jfp5qeM2oxrEDVdRqBc3JnqMZbYxe2uNLU4AzytZPpLNXcX'

In [4]:
pc=Pinecone(api_key=pinecone_api_key)

In [5]:
from pinecone import ServerlessSpec

In [6]:
index_name = "agenticbatch2"

In [7]:
pc.has_index(index_name)

False

In [8]:
if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws",region="us-east-1")
    )

In [9]:
index=pc.Index(index_name)

In [11]:
from langchain_pinecone import PineconeVectorStore

In [12]:
vector_store = PineconeVectorStore(index=index, embedding=embeddings_model)

In [13]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="The stock market reached an all-time high today.",
    metadata={"source": "news"}
)
document_2 = Document(
    page_content="The local football team won their championship game last night.",
    metadata={"source": "sports"}
)
document_3 = Document(
    page_content="Scientists have discovered a new species of bird in the Amazon.",
    metadata={"source": "news"}
)
document_4 = Document(
    page_content="Just saw the most amazing sunset! #blessed",
    metadata={"source": "tweet"}
)
document_5 = Document(
    page_content="Breaking: Major earthquake shakes the city center.",
    metadata={"source": "news"}
)
document_6 = Document(
    page_content="Can't believe my team lost in the last minute! #sportsfan",
    metadata={"source": "tweet"}
)
document_7 = Document(
    page_content="Olympic swimmer sets new world record in 100m freestyle.",
    metadata={"source": "sports"}
)
document_8 = Document(
    page_content="Elections results are in: incumbent wins by a narrow margin.",
    metadata={"source": "news"}
)
document_9 = Document(
    page_content="Just finished a 5k run, feeling great! #fitness",
    metadata={"source": "tweet"}
)
document_10 = Document(
    page_content="Basketball finals tonightâ€”who's watching?",
    metadata={"source": "tweet"}
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10
]

In [14]:
uuids = [str(uuid4()) for _ in range(len(documents))]

In [15]:
uuids

['87fc467f-20e5-487f-b62c-f2970d668312',
 'be58861e-0ad4-4da2-bbde-119c9b3bb7db',
 '4256ad3b-18fc-4bb8-994f-5a92ae7c4f7d',
 '10f27707-61e6-47e7-bed8-b5c4070501e1',
 'ba36113a-1291-404f-a282-7a24e485f614',
 '7f1a1457-b2a1-49bb-a886-b033e1f8a4de',
 '5d8c75bf-0367-4008-9935-cd3a01a1e5dd',
 '58c87d44-730c-4c87-8351-3bedcaa7e895',
 '3f9dfee4-3920-41ac-9518-50e979c5da56',
 '953e9885-5cbe-4b56-b68b-90f3b0b6829b']

In [16]:
vector_store.add_documents(documents = documents, ids = uuids)

['87fc467f-20e5-487f-b62c-f2970d668312',
 'be58861e-0ad4-4da2-bbde-119c9b3bb7db',
 '4256ad3b-18fc-4bb8-994f-5a92ae7c4f7d',
 '10f27707-61e6-47e7-bed8-b5c4070501e1',
 'ba36113a-1291-404f-a282-7a24e485f614',
 '7f1a1457-b2a1-49bb-a886-b033e1f8a4de',
 '5d8c75bf-0367-4008-9935-cd3a01a1e5dd',
 '58c87d44-730c-4c87-8351-3bedcaa7e895',
 '3f9dfee4-3920-41ac-9518-50e979c5da56',
 '953e9885-5cbe-4b56-b68b-90f3b0b6829b']

In [17]:
results = vector_store.similarity_search("Tell me som news")

In [18]:
results

[Document(id='3f9dfee4-3920-41ac-9518-50e979c5da56', metadata={'source': 'tweet'}, page_content='Just finished a 5k run, feeling great! #fitness'),
 Document(id='58c87d44-730c-4c87-8351-3bedcaa7e895', metadata={'source': 'news'}, page_content='Elections results are in: incumbent wins by a narrow margin.'),
 Document(id='10f27707-61e6-47e7-bed8-b5c4070501e1', metadata={'source': 'tweet'}, page_content='Just saw the most amazing sunset! #blessed'),
 Document(id='ba36113a-1291-404f-a282-7a24e485f614', metadata={'source': 'news'}, page_content='Breaking: Major earthquake shakes the city center.')]

In [19]:
results = vector_store.similarity_search("Tell me som news",filter={"source":"news"})
results

[Document(id='58c87d44-730c-4c87-8351-3bedcaa7e895', metadata={'source': 'news'}, page_content='Elections results are in: incumbent wins by a narrow margin.'),
 Document(id='ba36113a-1291-404f-a282-7a24e485f614', metadata={'source': 'news'}, page_content='Breaking: Major earthquake shakes the city center.'),
 Document(id='87fc467f-20e5-487f-b62c-f2970d668312', metadata={'source': 'news'}, page_content='The stock market reached an all-time high today.'),
 Document(id='4256ad3b-18fc-4bb8-994f-5a92ae7c4f7d', metadata={'source': 'news'}, page_content='Scientists have discovered a new species of bird in the Amazon.')]

In [None]:
retreiver = vector_store.as_retriever(
    search_type = "similarity_score_threshold",
    search_kwargs={"k":3, "score_threshold":0.6}    
)

In [34]:
retreiver.invoke("news")

[Document(id='58c87d44-730c-4c87-8351-3bedcaa7e895', metadata={'source': 'news'}, page_content='Elections results are in: incumbent wins by a narrow margin.'),
 Document(id='ba36113a-1291-404f-a282-7a24e485f614', metadata={'source': 'news'}, page_content='Breaking: Major earthquake shakes the city center.')]