In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_API_KEY']=os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT']=os.getenv('LANGCHAIN_PROJECT')
os.environ['LANGCHAIN_TRACING_V2']='true'

In [13]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI

In [5]:
loader=WebBaseLoader("https://docs.smith.langchain.com/tutorials/Administrators/manage_spend")
doc=loader.load()

In [7]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(doc)

In [9]:
embedding=OpenAIEmbeddings()
db=FAISS.from_documents(documents,embedding)


In [10]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x1e5ec1625f0>

In [14]:
llm=ChatOpenAI(model='gpt-4o-mini')
llm


ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001E5EC11E7A0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001E5EC11D780>, root_client=<openai.OpenAI object at 0x000001E5EC161E10>, root_async_client=<openai.AsyncOpenAI object at 0x000001E5EC11E800>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [15]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [17]:
prompt=ChatPromptTemplate.from_template(
    """
Answer the following questions based on provided context:
<context>
{context}
</context>

"""
)

document_chain=create_stuff_documents_chain(llm,prompt)

document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following questions based on provided context:\n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001E5EC11E7A0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001E5EC11D780>, root_client=<openai.OpenAI object at 0x000001E5EC161E10>, root_async_client=<openai.AsyncOpenAI object at 0x000001E5EC11E800>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documen

In [21]:
from langchain_core.documents import Document

document_chain.invoke({
    'input':'LangSmith has two usage limits: total traces and extended',
    'context':[Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph.")]
})

'Sure! Please go ahead and ask your questions based on the provided context.'

In [22]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"LangSmith has two usage limits: total traces and extended",
    "context":[Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph. ")]
})

'Sure! Please provide the questions you would like me to answer based on the provided context.'

In [23]:
retriver=db.as_retriever()

from langchain.chains import create_retrieval_chain

retriver_chain=create_retrieval_chain(retriver,document_chain)

In [24]:
retriver_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001E5EC1625F0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following questions based on provided context:\n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
            | ChatOpenAI

In [25]:
response=retriver_chain.invoke({'input':'LangSmith has two usage limits: total traces and extended'})

response['answer']

'Based on the provided context, here are the answers to potential questions:\n\n1. **What is the primary focus of Optimization 2?**\n   - The primary focus of Optimization 2 is to limit usage to prevent future overspending.\n\n2. **What are the two usage limits that LangSmith has?**\n   - The two usage limits are total traces and extended retention traces.\n\n3. **Where can users set usage limits for their workspaces?**\n   - Users can set usage limits by navigating to Settings -> Usage and Billing -> Usage configuration.\n\n4. **Why is it suggested to start by setting limits on production usage?**\n   - It is suggested to start by setting limits on production usage because that is where the majority of spending occurs.\n\n5. **What does the usage graph allow users to examine?**\n   - The usage graph allows users to examine how much of each usage-based pricing metric they have consumed lately.\n\n6. **What are the two usage metrics that LangSmith charges for?**\n   - The two usage metr

In [26]:
response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='That\'s a cost reduction of nearly 75% per day!Optimization 2: limit usage\u200bIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.LangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.To set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits p

In [27]:
response['context']

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='That\'s a cost reduction of nearly 75% per day!Optimization 2: limit usage\u200bIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.LangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.To set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits per workspace. For each workspace, the two limits appear, along\nwith a cost estimat