#### Simple Gen AI APP Using Langchain

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [4]:
## Data Ingestion--From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

In [5]:
loader=WebBaseLoader("https://docs.smith.langchain.com/tutorials/Administrators/manage_spend")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x10d7878b0>

In [34]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::9q56w-1738351159294-8617784dce41Read our documentation to learn more about this error.')]

In [16]:
### Load Data--> Docs-->Divide our Docuemnts into chunks dcouments-->text-->vectors-->Vector Embeddings--->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [17]:
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::4n2jv-1738262637365-406ddef7db54Read our documentation to learn more about this error.')]

In [18]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [19]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [20]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x1299a86a0>

In [21]:
## Query From a vector db
query="LangSmith has two usage limits: total traces and extended"
result=vectorstoredb.similarity_search(query)
result[0].page_content

'404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::4n2jv-1738262637365-406ddef7db54Read our documentation to learn more about this error.'

In [22]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [23]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x1299ab670>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x1299a9210>, root_client=<openai.OpenAI object at 0x1299a8c40>, root_async_client=<openai.AsyncOpenAI object at 0x1299ab6a0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_fact

In [24]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangSmith has two usage limits: total traces and extended",
    "context":[Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph. ")]
})

'LangSmith has two usage limits: total traces and extended traces.'

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [25]:
### Input--->Retriever--->vectorstoredb

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x1299a86a0>

In [26]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [27]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1299a86a0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | ChatOpe

In [28]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"LangSmith has two usage limits: total traces and extended"})
response['answer']

'It seems that the provided context contains an error message indicating a "404: NOT_FOUND" status. This typically means that the requested resource could not be found. Without additional information or context, it\'s not possible to answer any specific question related to the content of the document that was supposed to be referenced.'

In [29]:

response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='79d16f57-3979-48f7-9076-31e2dc87aa32', metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::4n2jv-1738262637365-406ddef7db54Read our documentation to learn more about this error.')],
 'answer': 'It seems that the provided context contains an error message indicating a "404: NOT_FOUND" status. This typically means that the requested resource could not be found. Without additional information or context, it\'s not possible to answer any specific question related to the content of the document that was supposed to be referenced.'}

In [30]:
response['context']

[Document(id='79d16f57-3979-48f7-9076-31e2dc87aa32', metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::4n2jv-1738262637365-406ddef7db54Read our documentation to learn more about this error.')]