In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
from langchain.schema import Document
from pinecone import Pinecone, ServerlessSpec
import os

os.environ['HF_HOME'] = 'D:/NLP/huggingface_cache'

In [2]:
load_dotenv()

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [14]:
documents = [
    Document(page_content="I am from Pakistan."),
    Document(page_content="Islamabad is the capital of Pakistan"),
    Document(page_content="Lahore is in Pakistan"),
    Document(page_content="our software house is in lahore")
]

In [15]:
# Initialize Pinecone client
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Create index (if not already exists)
index_name = "myappfortesting"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Connect to index
index = pc.Index(index_name)

In [16]:
from langchain_pinecone import PineconeVectorStore

vectorstore = PineconeVectorStore.from_documents(
    documents=documents,
    embedding=embeddings,
    index_name="myappfortesting"  
)

In [17]:
retriever = vectorstore.as_retriever(search_kwargs={'k':2})

In [18]:
query = "What is islamabad?"
res = retriever.invoke(query)

In [19]:
res

[Document(id='d5b9e1bc-55f0-4e92-bc11-e77b5967e2eb', metadata={}, page_content='Islamabad is the capital of Pakistan'),
 Document(id='83f45bfc-cc7a-4b51-984c-53d7bfff3883', metadata={}, page_content='Lahore is in Pakistan')]