In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPEN_API_KEY'] = os.getenv('OPEN_API_KEY')

## Langsmith Tracking

os.environ['LANGCHAIN_API_KEY']=os.getenv("LANGCHAIN_API_KEY")
os.environ['LANGCHAIN_TRACING_V2']="true"
os.environ['LANGCHAIN_PROJECT']=os.getenv("LANGCHAIN_PROJECT")

In [2]:
## Data Ingestion : From the website we need to scrape the data. For that we will use BeautifulSoup library.

from langchain_community.document_loaders import WebBaseLoader

loader=WebBaseLoader("https://docs.langchain.com/langsmith/observability-quickstart")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x110973110>

In [4]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/observability-quickstart', 'title': 'Tracing quickstart - Docs by LangChain', 'language': 'en'}, page_content='Tracing quickstart - Docs by LangChainSkip to main contentWe\'ve raised a $125M Series B to build the platform for agent engineering. Read more.Docs by LangChain home pageLangSmithSearch...⌘KGitHubTry LangSmithTry LangSmithSearch...NavigationTracing quickstartGet startedObservabilityEvaluationPrompt engineeringDeploymentAgent BuilderHostingOverviewQuickstartConceptsTrace a RAG applicationTracing setupIntegrationsManual instrumentationThreadsConfiguration & troubleshootingProject & environment settingsAdvanced tracing techniquesData & privacyTroubleshooting guidesViewing & managing tracesFilter tracesQuery traces (SDK)Compare tracesShare or unshare a trace publiclyView server logs for a traceBulk export trace dataAutomationsSet up automation rulesConfigure webhook notifications for rulesFeedback & evaluationLog

In [None]:
## Load Data --> Docs --> Divide Data into Chunks documents--> Create Embeddings --> Store Embeddings in Vector Database --> Create Retrieval QA Chain --> Ask Questions

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/observability-quickstart', 'title': 'Tracing quickstart - Docs by LangChain', 'language': 'en'}, page_content="Tracing quickstart - Docs by LangChainSkip to main contentWe've raised a $125M Series B to build the platform for agent engineering. Read more.Docs by LangChain home pageLangSmithSearch...⌘KGitHubTry LangSmithTry LangSmithSearch...NavigationTracing quickstartGet startedObservabilityEvaluationPrompt engineeringDeploymentAgent BuilderHostingOverviewQuickstartConceptsTrace a RAG applicationTracing setupIntegrationsManual instrumentationThreadsConfiguration & troubleshootingProject & environment settingsAdvanced tracing techniquesData & privacyTroubleshooting guidesViewing & managing tracesFilter tracesQuery traces (SDK)Compare tracesShare or unshare a trace publiclyView server logs for a traceBulk export trace dataAutomationsSet up automation rulesConfigure webhook notifications for rulesFeedback & evaluationLog 

In [8]:
from langchain_openai import OpenAIEmbeddings

embeddings=OpenAIEmbeddings()


In [11]:
from langchain_community.vectorstores import FAISS
vectorstoreDB=FAISS.from_documents(documents,embeddings)
vectorstoreDB

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")


In [None]:
## Retrievers and Chain

query="Explain about Langchain Observability Quickstart?"
result=vectorstoreDB.simlarity_search(query)
result[0].page_content

In [None]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain ##create_stuff_documents_chain is used to provide the {context} in the prompt.
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""Answer the following Question Based only on the context below. 
                                          <context>
                                          {context}
                                          </context>""") ##Here {context} will be replaced by the relevant document from the vector database.

document_chain=create_stuff_documents_chain(llm,prompt) ##document_chain is used to create the chain for the document.Which is helpful to provide the context in the prompt.
document_chain

In [None]:
document_chain.invoke({
    "input": "Explain about Langchain Observability Quickstart?",
    "context":[Document(page_content="Observability is a critical requirement for applications built with large language models (LLMs). LLMs are non-deterministic, which means that the same prompt can produce different responses. This behavior makes debugging and monitoring more challenging than with traditional software.")]
    }) ##here we are providing the context manually.It will be provided automatically using the retriever.


In [2]:
### Retriever - It is a path to get the information from the vector database.It is an interface to get the relevant documents from the vector database based on the query.Here we dont have to similarity search instead we will use retriever.

vectorstoreDB.as_retriever() ## It will return the retriever object.Which is used to get the relevant documents from the vector database.
from langchain.chains import create_retrieval_chain ##create_retrieval_chain is used to create the retrieval chain.

retrieval_chain=create_retrieval_chain(retriever,document_chain) ##here retriever is used to get the relevant documents from the vector database and document_chain is used to provide the context in the prompt.

NameError: name 'vectorstoreDB' is not defined

In [None]:
## Get the response from llm

response=retrieval_chain.invoke({"input": "Explain about Langchain Observability Quickstart?"}) ##Here input is the question we are asking.
response['answer'] ##Final Answer from the llm based on the context provided from the vector database.

In [None]:
response['context'] ##We can also print the context used to get the final answer.