## Setup and Import Libraries

In [1]:
import os
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = os.getenv("LANGSMITH_TRACING")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

In [4]:
llm = ChatOpenAI(model="gpt-4o")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000198367471D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000198371FF950>, root_client=<openai.OpenAI object at 0x00000198561352D0>, root_async_client=<openai.AsyncOpenAI object at 0x00000198561BBB50>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))

## Document Loader

In [5]:
loader = WebBaseLoader("https://docs.smith.langchain.com/administration/tutorials/manage_spend")
docs = loader.load()

## Text Splitting

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

In [7]:
documents = text_splitter.split_documents(documents=docs)

In [8]:
# documents

## Embeddings and Vector Store

In [9]:
embeddings = OpenAIEmbeddings()

In [10]:
vector_store_db = FAISS.from_documents(
    documents=documents,
    embedding=embeddings
)

vector_store_db

<langchain_community.vectorstores.faiss.FAISS at 0x198561ae710>

## Query Vector Database

In [11]:
query = "LangSmith has two usage limits: total traces and extended"

In [12]:
result = vector_store_db.similarity_search(query=query)
result[0].page_content

'Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to double in size in the near future.\n\nFrom these assumptions, we can do a quick back-of-the-envelope calculation to get a good limit of:\nlimit = current_load_per_day * expected_growth * days/month      = 130,000 * 2 * 30      = 7,800,000 traces / month\nWe click on the edit icon on the right side of the table for our Prod row, and can enter this limit as follows:'

## Retrieval Chain

In [13]:
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:
    <context>
    {context}
    </context>
    """
)

In [14]:
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000198367471D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000198371FF950>, root_client=<openai.OpenAI object at 0x00000198561352D0>, root_async_client=<openai.AsyncOpenAI object at 0x00000198561BBB50>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), 

In [15]:
document_chain.invoke(
    {
        "input":"LangSmith has two usage limits: total traces and extended",
        "context": [Document(page_content="LangSmith has two usage limits: total traces and extended etention traces. These correspond to the two metrics we've been tracking on our usage graph.")]
    }
)

'LangSmith has two usage limits: total traces and extended retention traces. These are the two metrics that are tracked on their usage graph.'

## Retriever

In [16]:
retriever = vector_store_db.as_retriever()

In [17]:
retrieval_chain = create_retrieval_chain(
    retriever=retriever,
    combine_docs_chain=document_chain
)

retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000198561AE710>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})]

In [18]:
response = retrieval_chain.invoke(
    {
        "input":"LangSmith has two usage limits: total traces and extended"
    }
)

response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='f9b7b843-b0e8-4a82-9a4b-453ee953a386', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to 

In [19]:
response['answer']

'Based on the provided context, you can determine the appropriate "total traces" limit for LangSmith usage by considering the current load and expected growth. The current load is approximately 100,000-130,000 traces per day, and the expected growth is to double in size in the near future. Therefore, the limit can be calculated as follows:\n\n\\[ \\text{limit} = \\text{current\\_load\\_per\\_day} \\times \\text{expected\\_growth} \\times \\text{days/month} = 130,000 \\times 2 \\times 30 = 7,800,000 \\text{ traces/month} \\]\n\nThis calculation assumes a maximum of 130,000 traces per day and a monthly calculation period of 30 days.'

In [20]:
response['context']

[Document(id='f9b7b843-b0e8-4a82-9a4b-453ee953a386', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to double in size in the near future.\n\nFrom these assumptions, we can do a quick bac