In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [4]:
# document laoder
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.smith.langchain.com/administration/tutorials/manage_spend")
docs = loader.load()

In [6]:
# split into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_dos = splitter.split_documents(docs)

In [7]:
# embedding
from langchain_openai.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [8]:
# vector store db
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(final_dos, embeddings)

In [9]:
# query from vector store db
query = "LangSmith has two usage limits: total traces and extended retention traces"
result = db.similarity_search(query)
result

[Document(id='92ba4cb0-23cd-45cb-9712-945b218574f7', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content="The first metric tracks all traces that you send to LangSmith. The second tracks all traces that also have our Extended 400 Day Data Retention.\nFor more details, see our data retention conceptual docs. Notice that these graphs look\nidentical, which will come into play later in the tutorial.\nLangSmith Traces usage is measured per workspace, because workspaces often represent development environments (as in our example),\nor teams within an organization. As a LangSmith administrator, we want to understand spend granularly per each of these units. In\nthis case where we just want to cut spend, we can focus on the environment responsible for the majority

In [10]:
# llm
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()

In [12]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

promt = ChatPromptTemplate.from_template(
     """
Answer the following question based only on the provided context:
<context>
{context}
</context>

"""
)
document_chain = create_stuff_documents_chain(llm, promt)

In [13]:
# retrival chain
retriver = db.as_retriever()

from langchain.chains import create_retrieval_chain

retreival_chain = create_retrieval_chain(retriver, document_chain)

In [15]:
response = retreival_chain.invoke({"input":"LangSmith has two usage limits: total traces and extended"})
response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='7d78396e-b31d-4467-b65d-3f9e7f9b14af', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to 

In [16]:
response['answer']

'The majority of spend comes from production usage, specifically from sending traces to LangSmith. In order to set a good total traces limit, it is important to consider the current load of traces being sent and the expected growth in load. By calculating the limit based on these factors, we can determine the appropriate limit for our usage. It is also important to understand current usage by utilizing tools such as the Usage Graph and Invoices provided by LangSmith. The workspace ID c27dd32c-7c80-4e8c-acde-bfcb67a90ab2 is highlighted as the workspace with the vast majority of usage, and focusing on this environment can lead to the greatest cost savings.'