## Simple Gen AI App Using Langchain

In [44]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['PPLX_API_KEY'] = os.getenv('PPLX_API_KEY')
os.environ['GEMINI_API_KEY'] = os.getenv('GEMINI_API_KEY')
# Langsmith Tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

In [45]:
## Data Ingestion -- from the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [46]:
loader = WebBaseLoader("https://docs.langchain.com/langsmith/observability-quickstart")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x25121072930>

In [47]:
docs =loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/observability-quickstart', 'title': 'Tracing quickstart - Docs by LangChain', 'language': 'en'}, page_content='Tracing quickstart - Docs by LangChainOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KLangSmithPlatform for LLM observability and evaluationOverviewQuickstartsTrace an applicationEvaluate an applicationTest promptsAPI & SDKsAPI referencePython SDKJS/TS SDKPricingPlansPricing FAQOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KAsk AIForumForumSearch...NavigationQuickstartsTracing quickstartGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationForumOn this pageGet started1. Install Dependencies2. Create an API key3. Set up environment variabl

In [48]:
## Load Data --> Docs -->Divide our text into chunks --> vectors --> Vector Embeddings --> Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)

In [49]:
documents

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/observability-quickstart', 'title': 'Tracing quickstart - Docs by LangChain', 'language': 'en'}, page_content='Tracing quickstart - Docs by LangChainOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KLangSmithPlatform for LLM observability and evaluationOverviewQuickstartsTrace an applicationEvaluate an applicationTest promptsAPI & SDKsAPI referencePython SDKJS/TS SDKPricingPlansPricing FAQOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KAsk AIForumForumSearch...NavigationQuickstartsTracing quickstartGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationForumOn this pageGet started1. Install Dependencies2. Create an API key3. Set up environment variabl

In [50]:
import getpass
import os

if "PPLX_API_KEY" not in os.environ:
    os.environ["PPLX_API_KEY"] = getpass.getpass("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")

In [51]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001",google_api_key=os.environ['GEMINI_API_KEY'])

In [52]:
from langchain_community.vectorstores import FAISS

vectorstoredb = FAISS.from_documents(documents, embeddings)

In [53]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x251519d90a0>

In [54]:
## Query from a vector db

query = "This example uses OpenAI, but you can adapt it to use any LLM provider. If you’re using Anthropic"
result = vectorstoredb.similarity_search(query)
result[0].page_content

'\u200b2. Create an API key\nTo create an API key head to the LangSmith settings page. Then click + API Key.\n\u200b3. Set up environment variables\nThis example uses OpenAI, but you can adapt it to use any LLM provider.\nIf you’re using Anthropic, use the Anthropic wrapper to trace your calls. For other providers, use the traceable wrapper.\nCopyAsk AIexport LANGSMITH_TRACING=true\nexport LANGSMITH_API_KEY="<your-langsmith-api-key>"\nexport OPENAI_API_KEY="<your-openai-api-key>"\n\n\u200b4. Define your application\nWe will instrument a simple RAG application for this tutorial, but feel free to use your own code if you’d like - just make sure it has an LLM call!\nApplication CodePythonTypeScriptCopyAsk AIfrom openai import OpenAI\nopenai_client = OpenAI()\n\n# This is the retriever we will use in RAG\n# This is mocked out, but it could be anything we want\ndef retriever(query: str):\n    results = ["Harrison worked at Kensho"]\n    return results'

In [55]:
from langchain_perplexity import ChatPerplexity
llm = ChatPerplexity(model="sonar-pro", temperature=0) 

In [56]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based on the provided context:
    <context>
    {context}
    </context>
    """
)

document_chain = create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatPerplexity(client=<openai.OpenAI object at 0x0000025121073170>, model='sonar-pro', temperature=0.0, model_kwargs={}, pplx_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [57]:
from langchain_core.documents import Document 
document_chain.invoke({
    "input":"This example uses OpenAI, but you can adapt it to use any LLM provider. If you’re using Anthropic",
    "context": [Document(page_content="This example uses OpenAI, but you can adapt it to use any LLM provider. If you’re using Anthropic")]
})

'You can adapt the example that uses **OpenAI** to work with **Anthropic** by leveraging Anthropic’s Model Context Protocol (MCP), which provides a standardized, interoperable way to connect AI models to external tools and resources[1][2]. This involves switching from OpenAI’s centralized API approach to Anthropic’s protocol-focused, client-server architecture.\n\nTo adapt your integration:\n\n- **Replace OpenAI-specific API calls** with MCP-compatible requests. Anthropic’s MCP allows you to connect your LLM client (which could be OpenAI, Anthropic, or another provider) to MCP servers that expose tools and resources[2].\n- **Initiate an MCP client** and connect it to the relevant MCP server (e.g., for database access or other tools)[2].\n- **Load tools and resources** from the MCP server, converting them into function-calling formats compatible with your LLM (often using JSON Schema)[2].\n- **Customize system messages** and prompts to reflect the available MCP features and tools[2].\n-

However, we want the documents to first come from the retriever we just set up. That way, we can use the retreiver to dynamically select the most relevent documents and pass those in for a given question.

In [None]:
## Input --->Retreival ---> VectorStoreDB

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x251519d90a0>

In [61]:
retriever =vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [62]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000251519D90A0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwar

In [63]:
## Get the response from the LLM
response =retrieval_chain.invoke({
    "input":"This example uses OpenAI, but you can adapt it to use any LLM provider. If you’re using Anthropic"
})

In [65]:
response['answer']

'To trace OpenAI calls in your RAG application using LangSmith, you need to wrap your OpenAI client with LangSmith’s tracing wrapper and set the appropriate environment variables, including your LangSmith API key.\n\n**Key steps:**\n\n- **Create an API key:** Go to the LangSmith settings page and generate an API key[5][1].\n- **Set environment variables:**  \n  - For recent LangSmith setups, use:\n    ```\n    export LANGSMITH_TRACING=true\n    export LANGSMITH_API_KEY="<your-langsmith-api-key>"\n    export OPENAI_API_KEY="<your-openai-api-key>"\n    ```\n  - Note: In older documentation, the variable was called `LANGCHAIN_API_KEY`, but for new projects, use `LANGSMITH_API_KEY`[3].\n- **Wrap your OpenAI client:**  \n  - Instead of using the OpenAI client directly, import and use the LangSmith wrapper:\n    ```python\n    from openai import OpenAI\n    from langsmith.wrappers import wrap_openai\n\n    openai_client = wrap_openai(OpenAI())\n    ```\n  - This ensures all OpenAI calls are 

In [66]:
response

{'input': 'This example uses OpenAI, but you can adapt it to use any LLM provider. If you’re using Anthropic',
 'context': [Document(id='22ff76a5-1574-40ec-bcef-1b95a3d3d92e', metadata={'source': 'https://docs.langchain.com/langsmith/observability-quickstart', 'title': 'Tracing quickstart - Docs by LangChain', 'language': 'en'}, page_content='\u200b2. Create an API key\nTo create an API key head to the LangSmith settings page. Then click + API Key.\n\u200b3. Set up environment variables\nThis example uses OpenAI, but you can adapt it to use any LLM provider.\nIf you’re using Anthropic, use the Anthropic wrapper to trace your calls. For other providers, use the traceable wrapper.\nCopyAsk AIexport LANGSMITH_TRACING=true\nexport LANGSMITH_API_KEY="<your-langsmith-api-key>"\nexport OPENAI_API_KEY="<your-openai-api-key>"\n\n\u200b4. Define your application\nWe will instrument a simple RAG application for this tutorial, but feel free to use your own code if you’d like - just make sure it ha

In [67]:
response['context']

[Document(id='22ff76a5-1574-40ec-bcef-1b95a3d3d92e', metadata={'source': 'https://docs.langchain.com/langsmith/observability-quickstart', 'title': 'Tracing quickstart - Docs by LangChain', 'language': 'en'}, page_content='\u200b2. Create an API key\nTo create an API key head to the LangSmith settings page. Then click + API Key.\n\u200b3. Set up environment variables\nThis example uses OpenAI, but you can adapt it to use any LLM provider.\nIf you’re using Anthropic, use the Anthropic wrapper to trace your calls. For other providers, use the traceable wrapper.\nCopyAsk AIexport LANGSMITH_TRACING=true\nexport LANGSMITH_API_KEY="<your-langsmith-api-key>"\nexport OPENAI_API_KEY="<your-openai-api-key>"\n\n\u200b4. Define your application\nWe will instrument a simple RAG application for this tutorial, but feel free to use your own code if you’d like - just make sure it has an LLM call!\nApplication CodePythonTypeScriptCopyAsk AIfrom openai import OpenAI\nopenai_client = OpenAI()\n\n# This is 