#### Simple Gen AI APP Using Langchain

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY_1")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [5]:
## Data Ingestion--From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


In [6]:
loader=WebBaseLoader("https://docs.langchain.com/oss/python/langchain/rag")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x2105ed09540>

In [7]:
docs=loader.load()
docs



In [8]:
### Load Data--> Docs-->Divide our Docuemnts into chunks dcouments-->text-->vectors-->Vector Embeddings--->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [9]:
documents

[Document(metadata={'source': 'https://docs.langchain.com/oss/python/langchain/rag', 'title': 'Build a RAG agent with LangChain - Docs by LangChain', 'language': 'en'}, page_content='Build a RAG agent with LangChain - Docs by LangChainSkip to main contentDocs by LangChain home pageLangChain + LangGraphSearch...‚åòKAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationLangChainBuild a RAG agent with LangChainLangChainLangGraphDeep AgentsIntegrationsLearnReferenceContributePythonLearnTutorialsLangChainSemantic searchRAG agentSQL agentVoice agentMulti-agentLangGraphConceptual overviewsLangChain vs. LangGraph vs. Deep AgentsComponent architectureMemoryContextGraph APIFunctional APIAdditional resourcesLangChain AcademyCase studiesGet helpOn this pageOverviewConceptsPreviewSetupInstallationLangSmithComponents1. IndexingLoading documentsSplitting documentsStoring documents2. Retrieval and generationRAG agentsRAG chainsNext stepsTutorialsLangChainBuild a RAG agent with LangChainCopy pageCop

In [10]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [11]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [12]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x2102e86a2c0>

In [15]:
## Query From a vector db
query="We will need to select three components from LangChain‚Äôs suite of integrations."
result=vectorstoredb.similarity_search(query)
result[0].page_content

'Or, set them in Python:\nCopyimport getpass\nimport os\n\nos.environ["LANGSMITH_TRACING"] = "true"\nos.environ["LANGSMITH_API_KEY"] = getpass.getpass()\n\n\u200bComponents\nWe will need to select three components from LangChain‚Äôs suite of integrations.\nSelect a chat model:\n OpenAI Anthropic Azure Google Gemini AWS Bedrock HuggingFaceüëâ Read the OpenAI chat model integration docsCopypip install -U "langchain[openai]"\ninit_chat_modelModel ClassCopyimport os\nfrom langchain.chat_models import init_chat_model\n\nos.environ["OPENAI_API_KEY"] = "sk-..."\n\nmodel = init_chat_model("gpt-4.1")\nüëâ Read the Anthropic chat model integration docsCopypip install -U "langchain[anthropic]"\ninit_chat_modelModel ClassCopyimport os\nfrom langchain.chat_models import init_chat_model\n\nos.environ["ANTHROPIC_API_KEY"] = "sk-..."'

In [16]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [20]:
## Retrieval Chain, Document chain

from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(profile={'max_input_tokens': 128000, 'max_output_tokens': 16384, 'image_inputs': True, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True, 'structured_output': True, 'image_url_inputs': True, 'pdf_inputs': True, 'pdf_tool_message': True, 'image_tool_message': True, 'tool_choice': True}, client=<openai.resources.chat.completions.co

In [25]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangChain‚Äôs suite of integrations.",
    "context":[Document(page_content="Components We will need to select three components from LangChain‚Äôs suite of integrations.")]
})

"Sure, please provide more details or specify the question you need answered about selecting components from LangChain's suite of integrations."

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [22]:
### Input--->Retriever--->vectorstoredb

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x2102e86a2c0>

In [26]:
retriever=vectorstoredb.as_retriever()
from langchain_classic.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain) ## document chain gives context information

In [27]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002102E86A2C0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | 

In [28]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"LangSmith has two usage limits: total traces and extended"})
response['answer']

'What are some drawbacks of using the agentic RAG formulation in LangSmith?\n\nBased on the provided context, the drawbacks of using the agentic RAG formulation in LangSmith are:\n\n1. **Two inference calls**: When a search is performed, it requires one call to generate the query and another to produce the final response.\n2. **Reduced control**: The LLM may skip searches when they are actually needed or issue extra searches when they are unnecessary.'

In [29]:

response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='f19fac4b-a130-4759-a95e-ece4f5019ade', metadata={'source': 'https://docs.langchain.com/oss/python/langchain/rag', 'title': 'Build a RAG agent with LangChain - Docs by LangChain', 'language': 'en'}, page_content='For more details, see our Installation guide.\n\u200bLangSmith\nMany of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.\nAfter you sign up at the link above, make sure to set your environment variables to start logging traces:\nCopyexport LANGSMITH_TRACING="true"\nexport LANGSMITH_API_KEY="..."\n\nOr, set them in Python:\nCopyimport getpass\nimport os\n\nos.environ["LANGSMITH_TRACING"] = "true"\nos.environ["LANGSMITH_API_KEY"] = getpass.getpass()')

In [30]:
response['context']

[Document(id='f19fac4b-a130-4759-a95e-ece4f5019ade', metadata={'source': 'https://docs.langchain.com/oss/python/langchain/rag', 'title': 'Build a RAG agent with LangChain - Docs by LangChain', 'language': 'en'}, page_content='For more details, see our Installation guide.\n\u200bLangSmith\nMany of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.\nAfter you sign up at the link above, make sure to set your environment variables to start logging traces:\nCopyexport LANGSMITH_TRACING="true"\nexport LANGSMITH_API_KEY="..."\n\nOr, set them in Python:\nCopyimport getpass\nimport os\n\nos.environ["LANGSMITH_TRACING"] = "true"\nos.environ["LANGSMITH_API_KEY"] = getpass.getpass()'),
 Document(id='291041d2-c4af-4803-987c-9cc20992728e', metadata={'source': 'https:/