#### Simple Gen AI APP Using Langchain

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [9]:
## Data Ingestion--From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

In [10]:
loader=WebBaseLoader("https://docs.smith.langchain.com/tutorials/Administrators/manage_spend")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x107f0b9d0>

In [11]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::48sb6-1738351955235-85420bef0ef6Read our documentation to learn more about this error.')]

In [12]:
### Load Data--> Docs-->Divide our Docuemnts into chunks dcouments-->text-->vectors-->Vector Embeddings--->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [13]:
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::48sb6-1738351955235-85420bef0ef6Read our documentation to learn more about this error.')]

In [14]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [15]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [16]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x12825ab00>

In [17]:
## Query From a vector db
query="LangSmith has two usage limits: total traces and extended"
result=vectorstoredb.similarity_search(query)
result[0].page_content

'404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::48sb6-1738351955235-85420bef0ef6Read our documentation to learn more about this error.'

In [18]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [19]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x128029e70>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x12802bf40>, root_client=<openai.OpenAI object at 0x12825ad70>, root_async_client=<openai.AsyncOpenAI object at 0x128029ed0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_fact

In [21]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangSmith has two usage limits: total traces and extended",
    "context":[Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph. ")]
})

'LangSmith tracks usage based on two limits: total traces and extended traces. These are the metrics used for monitoring usage on their usage graph.'

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [22]:
### Input--->Retriever--->vectorstoredb

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x12825ab00>

In [23]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [24]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x12825ab00>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | ChatOpe

In [25]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"LangSmith has two usage limits: total traces and extended"})
response['answer']

'It seems that the context provided is an error message indicating a "404: NOT_FOUND" situation, which typically means that the requested resource could not be found on the server. Unfortunately, without additional information or context, I can\'t provide a specific answer to your question. If you have a different question or need help with something else, please let me know!'

In [26]:

response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='8f1408d8-c498-4356-99fc-581574fba039', metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::48sb6-1738351955235-85420bef0ef6Read our documentation to learn more about this error.')],
 'answer': 'It seems that the context provided is an error message indicating a "404: NOT_FOUND" situation, which typically means that the requested resource could not be found on the server. Unfortunately, without additional information or context, I can\'t provide a specific answer to your question. If you have a different question or need help with something else, please let me know!'}

In [27]:
response['context']

[Document(id='8f1408d8-c498-4356-99fc-581574fba039', metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': '404: NOT_FOUND', 'language': 'en'}, page_content='404: NOT_FOUND404: NOT_FOUND\nCode: NOT_FOUND\nID: bom1::48sb6-1738351955235-85420bef0ef6Read our documentation to learn more about this error.')]