#### Simple GENAI APPLICATION

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"]= os.getenv("LANGCHAIN_PROJECT")

In [2]:
### Data Ingestion -- From the website we need to scrape the data

from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://docs.langchain.com/langsmith/evaluate-chatbot-tutorial")
loader

USER_AGENT environment variable not set, consider setting it to identify your requests.


<langchain_community.document_loaders.web_base.WebBaseLoader at 0x2004d2c3ce0>

In [3]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/evaluate-chatbot-tutorial', 'title': 'Evaluate a chatbot - Docs by LangChain', 'language': 'en'}, page_content='Evaluate a chatbot - Docs by LangChainOur new LangChain Academy course on Deep Agents is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KLangSmithPlatform for LLM observability and evaluationOverviewConceptsEvaluation approachesDatasetsCreate a datasetManage datasetsSet up evaluationsRun an evaluationEvaluation typesFrameworks & integrationsEvaluation techniquesImprove evaluatorsTutorialsEvaluate a chatbotEvaluate a RAG applicationTest a ReAct agent with Pytest/Vitest and LangSmithEvaluate a complex agentRun backtests on a new version of an agentAnalyze experiment resultsAnalyze an experimentCompare experiment resultsFilter experiments in the UIFetch performance metrics for an experimentUpload experiments run outside of LangSmithAnnotation & human feedbackUse annotation queuesSet up feed

In [4]:
### Load Data --> Docs --> DDivide our text into chunks --> Embeddings --> Vector DB
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)
documents


[Document(metadata={'source': 'https://docs.langchain.com/langsmith/evaluate-chatbot-tutorial', 'title': 'Evaluate a chatbot - Docs by LangChain', 'language': 'en'}, page_content='Evaluate a chatbot - Docs by LangChainOur new LangChain Academy course on Deep Agents is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KLangSmithPlatform for LLM observability and evaluationOverviewConceptsEvaluation approachesDatasetsCreate a datasetManage datasetsSet up evaluationsRun an evaluationEvaluation typesFrameworks & integrationsEvaluation techniquesImprove evaluatorsTutorialsEvaluate a chatbotEvaluate a RAG applicationTest a ReAct agent with Pytest/Vitest and LangSmithEvaluate a complex agentRun backtests on a new version of an agentAnalyze experiment resultsAnalyze an experimentCompare experiment resultsFilter experiments in the UIFetch performance metrics for an experimentUpload experiments run outside of LangSmithAnnotation & human feedbackUse annotation queuesSet up feed

In [5]:
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain_community.vectorstores import FAISS

vectorstoredb = FAISS.from_documents(documents=documents, embedding=embeddings)

In [7]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x2006f496600>

In [8]:
## Query from vector sb
query = " Each datapoint should consist of, at the very least, the inputs to the application."
result = vectorstoredb.similarity_search(query)
result[0].page_content

'Schema: Each datapoint should consist of, at the very least, the inputs to the application. If you are able, it is also very helpful to define the expected outputs - these represent what you would expect a properly functioning application to output. Often times you cannot define the perfect output - that’s okay! Evaluation is an iterative process. Sometimes you may also want to define more information for each example - like the expected documents to fetch in RAG, or the expected steps to take as an agent. LangSmith datasets are very flexible and allow you to define arbitrary schemas.\nHow many: There’s no hard and fast rule for how many you should gather. The main thing is to make sure you have proper coverage of edge cases you may want to guard against. Even 10-50 examples can provide a lot of value! Don’t worry about getting a large number to start - you can (and should) always add over time!'

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

In [10]:
## Retrieval Chain, Document Chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """
Answer the following question based on the provided context: 
<context>{context}</context>
"""
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based on the provided context: \n<context>{context}</context>\n'), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), temperature=0.0, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000002006F497FB0>, default_metadata=(), model_kwargs={})
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [12]:
from langchain_core.documents import Document
document_chain.invoke({
    "input": "Each datapoint should consist of, at the very least, the inputs to the application.",
    "context":[Document(page_content="Each datapoint should consist of, at the very least, the inputs to the application.If you are able, it is also very helpful to define the expected outputs - these represent what you would expect a properly functioning application to output.")]
})

'Based on the provided context, each datapoint should consist of, at the very least, the inputs to the application. It is also very helpful, if possible, to define the expected outputs.'

However, we want the documents to first come from the retriever we just set up. That wat, we can use the 
retriever to dynamically select the most relevant documents and pass those in for a given question.

In [13]:
## Input --> Retriever --> Vectorstore Db --> Relevant Docs --> output
retriever = vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain

retriever_chain = create_retrieval_chain(retriever, document_chain)
retriever_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002006F496600>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based on the provided context: \n<context>{context}</context>\n'), additional_kwargs={})])
            | 

In [14]:
## Get the response from the LLM
response = retriever_chain.invoke({
    "input": "Each datapoint should consist of, at the very least, the inputs to the application."
})
response['answer']

'There is no hard and fast rule for how many datapoints you should gather. The main thing is to ensure proper coverage of edge cases you may want to guard against. Even 10-50 examples can provide a lot of value, and you can always add more over time.\n\nFor the specific tutorial mentioned in the context, they will create 5 datapoints to evaluate.'

In [15]:
response

{'input': 'Each datapoint should consist of, at the very least, the inputs to the application.',
 'context': [Document(id='b2c4fbda-846c-4110-8662-18cba36fead4', metadata={'source': 'https://docs.langchain.com/langsmith/evaluate-chatbot-tutorial', 'title': 'Evaluate a chatbot - Docs by LangChain', 'language': 'en'}, page_content='Schema: Each datapoint should consist of, at the very least, the inputs to the application. If you are able, it is also very helpful to define the expected outputs - these represent what you would expect a properly functioning application to output. Often times you cannot define the perfect output - that’s okay! Evaluation is an iterative process. Sometimes you may also want to define more information for each example - like the expected documents to fetch in RAG, or the expected steps to take as an agent. LangSmith datasets are very flexible and allow you to define arbitrary schemas.\nHow many: There’s no hard and fast rule for how many you should gather. The