# Experiments

### Setup

In [None]:
# You can set them inline
import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy"

In [1]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

True

Here is the RAG Application that we've been working with throughout this course

In [5]:
import os
import tempfile
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "gpt-3.5-turbo"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings(chunk_size=100)

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


### Experiment

Here is a code snippet that should look similar to what you see from the starter code!

There are a few important components here.

1. We have defined an Evaluator
2. We pipe our dataset examples (dict) to the shape of input that our function `langsmith_rag` takes (str) using a target function

In [6]:
from langsmith import evaluate, Client

client = Client()
dataset_name = "Rag Application Golden Dataset"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-822b5335' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=131105b6-eba8-4169-9786-4d18812bedd8




11it [00:17,  1.62s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith while using Lan...,,To set up tracing with LangSmith while using L...,1,1.411467,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,6649b91d-0cc5-4901-b102-e7eee961319e
1,How can I trace with the @traceable decorator?,You can trace with the @traceable decorator by...,,To trace with the @traceable decorator in Pyth...,1,1.073368,70ade6fe-afe6-4f72-a04d-cc26a92424a4,f2ebad7e-8622-4e5d-8efc-db5af028c967
2,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,1.812511,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,495b411c-7021-4554-9270-4dfd69ae2c21
3,What testing capabilities does LangSmith have?,LangSmith offers prompt testing as part of its...,,LangSmith offers capabilities for creating dat...,1,1.157647,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,35accf3e-5aa3-4e42-a407-118933333903
4,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Of...",,"Yes, LangSmith supports offline evaluation thr...",1,0.975399,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,ea3414ba-a58a-4981-a846-d4e3bbdbee91
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used for evaluations of ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.850759,630842ec-e521-40e7-adec-f338377d45b3,67973fb8-bc1a-42df-adf9-295cce51f503
6,Can LangSmith be used for finetuning and model...,"Yes, LangSmith can be used for evaluation and ...",,"Yes, LangSmith can be used for fine-tuning and...",0,1.262374,7b6a555a-7ce6-4905-b191-1827ff4c31a2,67f7dd8d-1cd9-4582-b637-b348cfaa0b3d
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation, whi...",,"Yes, LangSmith supports online evaluation as a...",0,2.291492,8cbc4fdb-a176-4a73-8440-7c32299c8eb6,c5d4c3c6-ffa6-4c35-83d5-65983c457601
8,How do I pass metadata in with @traceable?,"To pass metadata with @traceable, you can use ...",,You can pass metadata with the @traceable deco...,1,1.650393,b5646fed-27d3-4135-9469-0880a0dc236e,03857a9e-2799-488c-aee1-f0f48b5ae141
9,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith while using Lan...,,To set up tracing to LangSmith while using Lan...,0,2.95803,d25a2c21-dc10-46f2-af77-6e59c95dff5b,a8523794-d0b8-42a0-8658-c4805df9a487


### Modifying your Application

Now, let's change our model to gpt-35-turbo and see how it performs!

Make this change, and then run this code snippet!

In [7]:
from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-52637291' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=0babbaa1-cb4e-4095-80c0-176cf445124f




11it [00:17,  1.61s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,To trace your application to LangSmith when us...,,To set up tracing with LangSmith while using L...,1,1.52394,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,b1becff0-2017-4a3d-bea2-5ee90350b5e7
1,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you ne...",,To trace with the @traceable decorator in Pyth...,1,1.747483,70ade6fe-afe6-4f72-a04d-cc26a92424a4,0bbad346-c0ac-4549-abf9-2c720d390b28
2,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,2.190348,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,87e76871-6669-453b-b1e7-6cbed5a0a607
3,What testing capabilities does LangSmith have?,"LangSmith offers prompt testing, allowing user...",,LangSmith offers capabilities for creating dat...,1,1.198164,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,84dc12ed-f361-4320-b6a8-03615ca4ccbb
4,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Of...",,"Yes, LangSmith supports offline evaluation thr...",1,1.350456,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,1bea0209-a21a-474e-8175-ce96710ee073
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",1,1.732091,630842ec-e521-40e7-adec-f338377d45b3,194b1e4f-8185-41f1-a0a5-15bf485d4a46
6,Can LangSmith be used for finetuning and model...,"Yes, LangSmith can be used for finetuning and ...",,"Yes, LangSmith can be used for fine-tuning and...",0,1.549423,7b6a555a-7ce6-4905-b191-1827ff4c31a2,34f4194f-5af9-4b0a-a055-7ed3e8d30b64
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation whic...",,"Yes, LangSmith supports online evaluation as a...",0,1.399182,8cbc4fdb-a176-4a73-8440-7c32299c8eb6,754a6230-3615-4adc-9ba1-63aedb81d4e6
8,How do I pass metadata in with @traceable?,"To pass metadata with @traceable in LangSmith,...",,You can pass metadata with the @traceable deco...,1,1.240393,b5646fed-27d3-4135-9469-0880a0dc236e,ed4dbfdd-ea1c-4963-bbd7-67a75638460e
9,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing to LangSmith while using Lan...,0,1.44805,d25a2c21-dc10-46f2-af77-6e59c95dff5b,c0f0e80b-f839-4eb8-a690-e780ccc8f755


### Running over Different pieces of Data

##### Dataset Version

You can execute an experiment on a specific version of a dataset in the sdk by using the `as_of` parameter in `list_examples`

Let's try running on just our initial dataset.

In [8]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, as_of="initial dataset"),   # We use as_of to specify a version
    evaluators=[is_concise_enough],
    experiment_prefix="initial dataset version"
)

View the evaluation results for experiment: 'initial dataset version-80aa80a1' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=d5550d62-580a-40f7-b7f1-e2db4793f1ee




10it [00:14,  1.47s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.65577,630842ec-e521-40e7-adec-f338377d45b3,2a9407d9-8ad2-444a-ac91-a375d7b0b517
1,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you ca...",,To trace with the @traceable decorator in Pyth...,1,1.101816,70ade6fe-afe6-4f72-a04d-cc26a92424a4,3769e6bb-7f93-49b4-a9f2-e55118be86d8
2,Can LangSmith be used for finetuning and model...,"Yes, LangSmith supports two types of evaluatio...",,"Yes, LangSmith can be used for fine-tuning and...",0,1.545229,7b6a555a-7ce6-4905-b191-1827ff4c31a2,92907c20-681f-44f6-8751-d68da20c2027
3,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.119963,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,132de409-aac1-405d-bfb0-ba91462c3e06
4,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluations for...",,"Yes, LangSmith supports online evaluation as a...",0,1.872792,8cbc4fdb-a176-4a73-8440-7c32299c8eb6,96eaf0dc-bbb1-48c9-a911-8bfdca36189c
5,What testing capabilities does LangSmith have?,LangSmith offers prompt testing capabilities f...,,LangSmith offers capabilities for creating dat...,1,1.498685,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,62bb834b-5602-424a-bb31-ebf70f8ed570
6,How do I pass metadata in with @traceable?,"To pass metadata with @traceable, you can add ...",,You can pass metadata with the @traceable deco...,1,1.130911,b5646fed-27d3-4135-9469-0880a0dc236e,a74f7260-d63b-4aee-884e-d0eac137626d
7,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Of...",,"Yes, LangSmith supports offline evaluation thr...",1,1.109829,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,a93e6ab1-5eb6-4d75-8dc7-ad390f95d5a8
8,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing to LangSmith while using Lan...,0,1.588179,d25a2c21-dc10-46f2-af77-6e59c95dff5b,f0306ea7-40a0-4be1-a61e-ebb1c195a6f4
9,What is LangSmith used for in three sentences?,"LangSmith is used for developing, debugging, a...",,LangSmith is a platform designed for the devel...,1,1.689152,e2e25f43-ec10-4aa1-8eb6-0fa4847e375d,5397f42a-54be-4db7-9f31-f82ca7a9753f


##### Dataset Split

You can run an experiment on a specific split of your dataset, let's try running on the Crucial Examples split.

In [9]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, splits=["crutial examples"]),  # We pass in a list of Splits
    evaluators=[is_concise_enough],
    experiment_prefix="Crucial Examples split"
)

View the evaluation results for experiment: 'Crucial Examples split-98294438' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=43b37db5-0668-4680-a3c5-8279b9f5baf8




5it [00:07,  1.45s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,To set up tracing for LangSmith when using Lan...,,To set up tracing with LangSmith while using L...,1,1.419638,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,70da55eb-41be-45af-8df4-ca8e616ee67c
1,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,1.137726,70ade6fe-afe6-4f72-a04d-cc26a92424a4,31eff6a9-ca4c-4dc6-89b0-bf15e1b5471c
2,How do I create user feedback with the LangSmi...,"To log user feedback using the LangSmith SDK, ...",,To create user feedback with the LangSmith SDK...,1,1.943951,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,825b3659-a1ff-4de9-b0b1-4db85359a5da
3,What testing capabilities does LangSmith have?,"LangSmith offers prompt testing capabilities, ...",,LangSmith offers capabilities for creating dat...,1,1.223704,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,da2cc4d1-cd14-4dc2-ab52-9b7fcd5166d4
4,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Of...",,"Yes, LangSmith supports offline evaluation thr...",0,1.229976,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,04493f03-e176-4b93-b50c-38e6f018bdcc


##### Specific Data Points

You can specify individual data points to run an experiment over as well

In [10]:
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name=dataset_name, 
        example_ids=[   # We pass in a specific list of example_ids
            # TODO: You will need to paste in your own example ids for this to work!
            "47ea8bf4-a7fe-4ef3-8243-1dcd298701e9",
            "630842ec-e521-40e7-adec-f338377d45b3"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="two specific example ids"
)

View the evaluation results for experiment: 'two specific example ids-0f9adecb' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=999b6b86-f1fb-4841-a7ca-6b67e5146bcd




2it [00:03,  1.80s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing with LangSmith while using L...,1,1.962122,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,4395bfc4-708e-4a91-843e-f7dad54e4b0e
1,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.331448,630842ec-e521-40e7-adec-f338377d45b3,e47e0942-ecd8-4ee2-be09-fa344702a764


### Other Parameters

##### Repetitions

You can run an experiment several times to make sure you have consistent results

In [11]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

View the evaluation results for experiment: 'two repetitions-62a41054' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=aa80a613-fc57-4f8c-aa7c-f1f3a9bcd57f




22it [00:32,  1.47s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing with LangSmith while using L...,1,1.66699,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,9952e38b-d130-4286-bac2-5cb8d5fbb501
1,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,1.221715,70ade6fe-afe6-4f72-a04d-cc26a92424a4,778f0b8c-9b81-403d-abbc-b721d331072d
2,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,1.826089,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,2f3f4e93-b65c-4799-be40-9fd6623eaf10
3,What testing capabilities does LangSmith have?,LangSmith offers prompt testing capabilities w...,,LangSmith offers capabilities for creating dat...,1,1.123871,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,b241f8de-94c0-401e-886c-6db3465deccb
4,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation whe...",,"Yes, LangSmith supports offline evaluation thr...",1,1.13931,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,66f53999-9e86-41a1-8e76-c4b77079a0e2
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.970588,630842ec-e521-40e7-adec-f338377d45b3,fed6a7d1-60f7-4bbc-b0b6-210d39bcce51
6,Can LangSmith be used for finetuning and model...,LangSmith can be used for fine-tuning and mode...,,"Yes, LangSmith can be used for fine-tuning and...",0,1.810288,7b6a555a-7ce6-4905-b191-1827ff4c31a2,da5a9871-44a4-450b-ac7b-da0a78f7c8ea
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluations whi...",,"Yes, LangSmith supports online evaluation as a...",0,1.840066,8cbc4fdb-a176-4a73-8440-7c32299c8eb6,e3ad4ae3-688a-47fd-8a46-29566921ea8f
8,How do I pass metadata in with @traceable?,"To pass metadata in with @traceable, you can p...",,You can pass metadata with the @traceable deco...,1,1.050038,b5646fed-27d3-4135-9469-0880a0dc236e,9827f441-fee4-43bf-a4ae-c7d96e54aa77
9,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing to LangSmith while using Lan...,0,1.724533,d25a2c21-dc10-46f2-af77-6e59c95dff5b,c5e91671-491d-44b0-8fd5-ae639e5c288e


##### Concurrency
You can also kick off concurrent threads of execution to make your experiments finish faster!

In [12]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

View the evaluation results for experiment: 'concurrency-ad538f9e' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=130acbab-1050-420e-9c88-16fbb237f4f4




11it [00:05,  1.85it/s]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I create user feedback with the LangSmi...,To create user feedback using the LangSmith SD...,,To create user feedback with the LangSmith SDK...,1,1.115952,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,1fac70db-4580-4ee0-b997-cc7bee97ef30
1,How can I trace with the @traceable decorator?,"To trace with the @traceable decorator, you ca...",,To trace with the @traceable decorator in Pyth...,1,1.245757,70ade6fe-afe6-4f72-a04d-cc26a92424a4,f466bb15-dcba-4e7e-8350-d1e46863b07f
2,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing with LangSmith while using L...,1,1.407075,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,08967697-7e3f-4af1-a1b5-f470e8e64f54
3,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation. Of...",,"Yes, LangSmith supports offline evaluation thr...",1,1.093075,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,34c72c5a-18af-448a-bd41-ec8062734aaa
4,What testing capabilities does LangSmith have?,"LangSmith offers prompt testing capabilities, ...",,LangSmith offers capabilities for creating dat...,1,1.375596,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,2a9b0871-e269-4ed5-8be3-da2443c36871
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",0,1.368974,630842ec-e521-40e7-adec-f338377d45b3,736de6de-0da9-473e-8794-1714fcb8ae46
6,Can LangSmith be used for finetuning and model...,"LangSmith can be used for evaluations, but it ...",,"Yes, LangSmith can be used for fine-tuning and...",1,1.108328,7b6a555a-7ce6-4905-b191-1827ff4c31a2,6e8c30b5-8b7f-4896-af8a-bbb1bae5b2bc
7,How do I pass metadata in with @traceable?,To pass metadata with @traceable in the LangSm...,,You can pass metadata with the @traceable deco...,1,1.288133,b5646fed-27d3-4135-9469-0880a0dc236e,efee4578-2bde-43d6-820e-313c27baf25a
8,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluation. Onl...",,"Yes, LangSmith supports online evaluation as a...",0,1.63804,8cbc4fdb-a176-4a73-8440-7c32299c8eb6,d874d5a2-a142-4ecd-bf64-b0bf6ccb6d07
9,How do I set up tracing to LangSmith if I'm us...,To set up tracing for LangSmith when using Lan...,,To set up tracing to LangSmith while using Lan...,1,1.115497,d25a2c21-dc10-46f2-af77-6e59c95dff5b,5e681e61-15a0-40e9-a7b8-e9b412a952ee


##### Metadata 

You can (and should) add metadata to your experiments, to make them easier to find in the UI

In [14]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added and try LLM AJ evaluator",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME
    }
)

View the evaluation results for experiment: 'metadata added and try LLM AJ evaluator-ed01e5b1' at:
https://eu.smith.langchain.com/o/05f40a35-2d5e-4cd4-9466-b540440921d2/datasets/41364ff4-0903-466e-95f3-b8e8bcf15805/compare?selectedSessions=6982ada4-6453-4068-bea1-35f0dce52f78




11it [00:25,  2.35s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing with LangSmith while using L...,1,2.968302,47ea8bf4-a7fe-4ef3-8243-1dcd298701e9,fc918607-f84b-470e-b990-0243181b2d20
1,How can I trace with the @traceable decorator?,To trace with the @traceable decorator in Pyth...,,To trace with the @traceable decorator in Pyth...,1,2.326632,70ade6fe-afe6-4f72-a04d-cc26a92424a4,08b2ee54-8048-4bfd-b3a9-9d0d8b508be1
2,How do I create user feedback with the LangSmi...,To create user feedback with the LangSmith SDK...,,To create user feedback with the LangSmith SDK...,1,2.687511,7b8a9ae4-26d1-40b5-890a-41c7ceffb6b5,321726b4-71d1-4e39-8d11-91b5ade267be
3,What testing capabilities does LangSmith have?,LangSmith offers prompt testing capabilities t...,,LangSmith offers capabilities for creating dat...,1,2.708043,910f0e40-e0e3-4479-ad9a-7eaf4edf32a9,c6ae505a-fdda-45ad-845f-0e4ed27addb4
4,Does LangSmith support offline evaluation?,"Yes, LangSmith supports offline evaluation for...",,"Yes, LangSmith supports offline evaluation thr...",0,2.682485,c1e9bb37-d2fb-41e5-a2e7-ae85d51c6ab2,1d41356e-9c01-4e7c-8db8-1d47e0dc5fd0
5,Can LangSmith be used to evaluate agents?,"Yes, LangSmith can be used to evaluate agents ...",,"Yes, LangSmith can be used to evaluate agents....",1,2.661048,630842ec-e521-40e7-adec-f338377d45b3,ba9e7ee6-fa1d-4728-8a0a-7053050450c3
6,Can LangSmith be used for finetuning and model...,LangSmith can be used for model evaluation thr...,,"Yes, LangSmith can be used for fine-tuning and...",0,1.466531,7b6a555a-7ce6-4905-b191-1827ff4c31a2,7f1fa9f2-066f-4f95-81b9-55fad9a347cb
7,Does LangSmith support online evaluation?,"Yes, LangSmith supports online evaluations to ...",,"Yes, LangSmith supports online evaluation as a...",0,2.215579,8cbc4fdb-a176-4a73-8440-7c32299c8eb6,0c9fac1f-b29e-49ff-b9ba-5ee70b17e7aa
8,How do I pass metadata in with @traceable?,To pass metadata in with @traceable in the Lan...,,You can pass metadata with the @traceable deco...,1,2.420169,b5646fed-27d3-4135-9469-0880a0dc236e,cd06db4e-3d12-43a8-9f89-1b537fd452bb
9,How do I set up tracing to LangSmith if I'm us...,To set up tracing to LangSmith when using Lang...,,To set up tracing to LangSmith while using Lan...,1,1.819222,d25a2c21-dc10-46f2-af77-6e59c95dff5b,f458fadb-6694-4f07-8975-db76b6c639b3
