In [11]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [12]:
## Data ingestion -- From  the web site we need to scrape the content

from langchain_community.document_loaders import WebBaseLoader

In [13]:
loader = WebBaseLoader("https://docs.smith.langchain.com/administration/tutorials/manage_spend")

loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x12113aaa0>

In [14]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='\n\n\n\n\nOptimize tracing spend on LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationTutorialsOptimize tracing spend on LangSmithHow-to GuidesSetupConceptual GuideSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceAdministrationTutorialsOptimize tracing spend on LangSmithOn this pageOptimize tracing spend on LangSmith\nRecommended R

In [15]:
### Load Data --> Docs --> Divide our text into chunks --> vectors using vector embedding --> vector store DB

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(docs)
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur Building Ambient Agents with LangGraph course is now available on LangChain Academy!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationTutorialsOptimize tracing spend on LangSmithHow-to GuidesSetupConceptual GuideSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceAdministrationTutorialsOptimize tracing spend on LangSmithOn this pageOptimize tracing spend on LangSmith\nRecommended ReadingBefo

In [16]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [17]:
from langchain_community.vectorstores import FAISS

vector_store_db = FAISS.from_documents(docs, embeddings)
vector_store_db

<langchain_community.vectorstores.faiss.FAISS at 0x12289e170>

In [20]:
## Query From a vector db
query="LangSmith has two usage limits: total traces and extended"
results = vector_store_db.similarity_search(query)
results[0].page_content

'Optimization 2: limit usage\u200b\nIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.\nLangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.\nTo set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits per workspace. For each workspace, the two limits appear, along\nwith a cost estimate:\n\nLets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a lim

In [21]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [22]:
## Retrieval Chain, Document Chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context
    <context>
    {context}
    </context>
    """
    )

document_chain=create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x125df5cc0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x125df6050>, root_client=<openai.OpenAI object at 0x125df5fc0>, root_async_client=<openai.AsyncOpenAI object at 0x125df7040>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name'

In [24]:
from langchain_core.documents import Document
document_chain.invoke({
    "context": [Document(page_content="LangSmith has two usage limits: total traces and extended retention traces")],
    "input": 'LangSmith has two usage limits: total traces and extended'
})


'LangSmith has two usage limits: one pertaining to the total number of traces and another concerning extended retention traces.'

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [26]:
### Input ---> Retriever ---> VectorstoreDB

vector_store_db

<langchain_community.vectorstores.faiss.FAISS at 0x12289e170>

In [29]:
retriever = vector_store_db.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [30]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x12289e170>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
      

In [35]:
## Get the response from LLM

response = retrieval_chain.invoke({
    "input": 'LangSmith has two usage limits: total traces and extended'
})
response['answer']

'What are the two usage limits provided by LangSmith mentioned in the context? \n\nLangSmith provides two usage limits: "total traces" and "extended retention traces." These limits correspond to the metrics tracked on the usage graph and help in managing and controlling spend per workspace.'

In [36]:
response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='33781cce-ea15-4b7c-8592-cd2aad649e59', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Optimization 2: limit usage\u200b\nIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.\nLangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.\nTo set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits