### PineCone

In [28]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
os.environ['PINECONE_API_KEY']=os.getenv('PINECONE_API_KEY')

In [35]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI

embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")
embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [None]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_go = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [36]:
try:
    doc = embeddings_go.embed_query("Hello world")
    print(len(doc))
except Exception as e:
    print(f"Error embedding query: {e}")

768


In [38]:
from pinecone import Pinecone
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pc = Pinecone(api_key=pinecone_api_key)

In [39]:
index_name = 'langchain-agentic-ai'

In [None]:
## Check if the index exists
pc.has_index(index_name)

False

In [41]:
## Serverless means it will be managed by the cloud provider
from pinecone import ServerlessSpec

## Create the index if it does not exist
index_name = 'langchain-agentic-ai'

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension = 768,
        metric='cosine',
        spec = ServerlessSpec(cloud='aws',region='us-east-1')
    )


In [42]:
## loading the index
index = pc.Index(index_name)

In [43]:
from langchain_pinecone import PineconeVectorStore

## Creating Pinecone Vector Index
vector_store = PineconeVectorStore(index=index, embedding=embeddings_go)

In [44]:
## uuid4 is used to generate unique identifiers for the documents
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

## Generating unique identifiers for each document
## This is important for FAISS to keep track of the documents.
uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents,ids=uuids)

['2e61b34d-3b2f-4cd3-a963-f7f1e37efe69',
 '78bdc4c1-a363-4960-bca8-469e0857dcc1',
 '05d124ab-8e2e-4405-ba94-a8473c8ca8f5',
 '6cf267e5-0f9e-4534-8a22-f2d939e2b7fc',
 'c4a32634-25a3-40fa-9469-195c5725e18e',
 '852e5419-712f-4a04-9d27-e30e226b770f',
 '1058036a-e2b9-4936-86fa-bc0fb8169b28',
 'ceedcfcb-e714-4f5c-ad96-788f8555761d',
 '78b9db2f-12d0-469f-b7a8-8e8f5380767b',
 'b6b0d6e4-8fec-4403-b3f3-713ec4199cb8']

In [50]:
results = vector_store.similarity_search(
    'what langchain provides to us?',
    k=4,
    filter={'source':'tweet'}
)

for res in results:
    print(f'* {res.page_content}-[{res.metadata}]')

* Building an exciting new project with LangChain - come check it out!-[{'source': 'tweet'}]
* LangGraph is the best framework for building stateful, agentic applications!-[{'source': 'tweet'}]
* Wow! That was an amazing movie. I can't wait to see it again.-[{'source': 'tweet'}]
* I had chocolate chip pancakes and scrambled eggs for breakfast this morning.-[{'source': 'tweet'}]


In [51]:
results_score = vector_store.similarity_search_with_score(
    'what langchain provides to us?',
    k=4,
    filter = {'source':'tweet'}
)

for res,scr in results_score:
    print(f'*[SIM:{scr}] {res.page_content}-[{res.metadata}]')

*[SIM:0.692204058] Building an exciting new project with LangChain - come check it out!-[{'source': 'tweet'}]
*[SIM:0.664477229] LangGraph is the best framework for building stateful, agentic applications!-[{'source': 'tweet'}]
*[SIM:0.492769241] Wow! That was an amazing movie. I can't wait to see it again.-[{'source': 'tweet'}]
*[SIM:0.491890401] I had chocolate chip pancakes and scrambled eggs for breakfast this morning.-[{'source': 'tweet'}]


In [74]:
retriever = vector_store.as_retriever(
    search_type='similarity_score_threshold',
    search_kwargs = {'k':1, 'score_threshold':0.5}
)

In [80]:
retriever.invoke("LangChain provides abstractions to make working with LLMs easy")

[Document(id='ceedcfcb-e714-4f5c-ad96-788f8555761d', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [76]:
from langchain import hub
prompt = hub.pull('rlm/rag-prompt')
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [77]:
model= ChatOpenAI(model='gpt-4o-mini')

In [78]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {'context': retriever | format_docs, 'question':RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [81]:
rag_chain.invoke('will it be hot tomorrow')

'It will not be hot tomorrow, as the forecast predicts a high of 62 degrees. The weather is expected to be cloudy and overcast.'