# Experiments

### Setup

In [None]:
# You can set them inline
import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy"

In [1]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

True

Here is the RAG Application that we've been working with throughout this course

In [5]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

# TODO: Configure this model!
MODEL_NAME = "gpt-3.5-turbo"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


### Experiment

Here is a code snippet that should look similar to what you see from the starter code!

There are a few important components here.

1. We have defined an Evaluator
2. We pipe our dataset examples (dict) to the shape of input that our function `langsmith_rag` takes (str) using a target function

In [4]:
from langsmith import evaluate, Client

client = Client()
dataset_name = "Rag application golden dataset"

def is_concise_enough(reference_outputs: dict, outputs: dict) -> dict:
    score = len(outputs["output"]) < 1.5 * len(reference_outputs["output"])
    return {"key": "is_concise", "score": int(score)}

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-4o"
)

  from .autonotebook import tqdm as notebook_tqdm


View the evaluation results for experiment: 'gpt-4o-b90ed084' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=67381d28-f5bc-4424-acd7-1c2c17795ffd




15it [00:21,  1.45s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't know.,,Dogs have about 300 million smell receptors co...,1,1.744435,7934f8be-0fff-445b-af03-3d5ab63acd61,18901045-12b6-4e80-a9b5-d0c651910e15
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.203533,87d99b5b-494c-42c6-8619-b21e92c47b3f,75dbe9c4-59fa-4af8-8710-e222b41fe571
2,What is the capital city of Australia?,The capital city of Australia is Canberra.,,The capital city of Australia is Canberra! It ...,1,1.659772,886dc989-9af7-4e60-bed6-9835ffc005d4,a8785980-7c14-4050-9523-5b5f6860cb79
3,Which ocean is the deepest point on Earth loca...,The deepest point on Earth is located in the P...,,The deepest point on Earth is the Challenger D...,1,1.740848,d056f4df-af36-4942-a406-6e9e014cc55a,64cd1ce1-5861-4bd9-8956-93ab9b1a1613
4,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.173784,f6c8615d-9c9d-45a8-917f-37ddd577adce,5b615c4f-a9fe-47d5-93cf-47483a3e82d0
5,What organ is responsible for detoxifying chem...,The liver is responsible for detoxifying chemi...,,The liver is the organ responsible for detoxif...,1,1.500325,1040ae4e-5b0e-47ff-8e04-f8ef80b88f8a,33a2820d-c960-4a3a-87d3-1f12579b2ccf
6,How do bees communicate with each other?,I don't know.,,Bees communicate with each other through a ser...,1,1.637665,7b455631-f65a-41ea-bd5d-3db230379a19,6cf8007c-76b1-4f24-8a9b-9e83e58caae1
7,What is the largest desert in the world?,The largest desert in the world is the Antarct...,,The largest desert in the world is the Antarct...,1,1.478703,3b8c26e5-b9bb-438c-a2eb-daa9a5d63bc2,cf6d9b11-fdd8-481f-9d25-67d949f55897
8,What is the longest river in the world?,I don't know.,,The Nile River is often considered the longest...,1,1.713337,098e6b5b-bb32-4548-83ae-e6407eec198c,0a2a7751-3175-4e56-aff0-17c5b81d10e9
9,What's the only food that never spoils?,The only food that never spoils is honey.,,Honey is the only food that never spoils! Arch...,1,1.250243,1f37f9ce-17c7-4dd1-b166-927f8fcabb96,bc55a7e1-67aa-462b-9d8f-4ded242f23d8


### Modifying your Application

Now, let's change our model to gpt-35-turbo and see how it performs!

Make this change, and then run this code snippet!

In [14]:
from langsmith import evaluate, Client
from langsmith.schemas import Example, Run

def target_function(inputs: dict):
    return langsmith_rag(inputs["question"])

evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="gpt-3.5-turbo"
)

View the evaluation results for experiment: 'gpt-3.5-turbo-12129ce3' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=e7fad9ee-4b0b-48cd-80de-a3a05817b1b4




15it [00:20,  1.36s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't know.,,Dogs have about 300 million smell receptors co...,1,1.205546,7934f8be-0fff-445b-af03-3d5ab63acd61,9b2f8e60-bb61-4e8b-8bca-4198473c424b
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.012239,87d99b5b-494c-42c6-8619-b21e92c47b3f,b8e66ddc-ce8d-4be4-8cf8-fcce24423502
2,What is the capital city of Australia?,I don't know the answer to this question.,,The capital city of Australia is Canberra! It ...,1,1.329574,886dc989-9af7-4e60-bed6-9835ffc005d4,0aa46ff5-89d9-4935-95d1-3d57340df303
3,Which ocean is the deepest point on Earth loca...,The Mariana Trench in the Pacific Ocean is whe...,,The deepest point on Earth is the Challenger D...,1,1.356985,d056f4df-af36-4942-a406-6e9e014cc55a,c8497681-97d7-4299-ab6c-c860b7daeccd
4,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.421149,f6c8615d-9c9d-45a8-917f-37ddd577adce,c1a54afe-00c9-4641-aa92-4c4e405ac750
5,What organ is responsible for detoxifying chem...,The liver is the organ responsible for detoxif...,,The liver is the organ responsible for detoxif...,1,1.231652,1040ae4e-5b0e-47ff-8e04-f8ef80b88f8a,9e802b6e-6a59-42e8-82f6-ec68d119d41a
6,How do bees communicate with each other?,I don't have information on how bees communica...,,Bees communicate with each other through a ser...,1,1.18695,7b455631-f65a-41ea-bd5d-3db230379a19,6baeaea2-eeb8-4d7c-9689-41489fbd0d74
7,What is the largest desert in the world?,The largest desert in the world is the Antarct...,,The largest desert in the world is the Antarct...,1,1.479175,3b8c26e5-b9bb-438c-a2eb-daa9a5d63bc2,1917b528-f585-4f1a-aac3-2663244e7597
8,What is the longest river in the world?,The longest river in the world is the Nile Riv...,,The Nile River is often considered the longest...,1,1.319651,098e6b5b-bb32-4548-83ae-e6407eec198c,e0926e65-2e85-4cf2-b2f0-64b0a2c7ebf7
9,What's the only food that never spoils?,Honey is the only food that never spoils due t...,,Honey is the only food that never spoils! Arch...,0,1.389882,1f37f9ce-17c7-4dd1-b166-927f8fcabb96,3fde962e-9240-48e9-b9c3-574bd2ae8a7e


### Running over Different pieces of Data

##### Dataset Version

You can execute an experiment on a specific version of a dataset in the sdk by using the `as_of` parameter in `list_examples`

Let's try running on just our initial dataset.

In [9]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, as_of="initial datasett"),   # We use as_of to specify a version
    evaluators=[is_concise_enough],
    experiment_prefix="initial dataset version"
)

View the evaluation results for experiment: 'initial dataset version-2fb6bdd6' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=315d4048-6563-4ebd-bf44-f2dd144a842d




10it [00:11,  1.19s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,What's the only food that never spoils?,Honey is the only food that never spoils due t...,,Honey is the only food that never spoils! Arch...,1,1.665942,1f37f9ce-17c7-4dd1-b166-927f8fcabb96,dd73c975-1a46-4c5f-a2f6-7ebcbd4bba3c
1,What's the fastest land animal in the world?,The fastest land animal in the world is the ch...,,"The cheetah is the fastest land animal, capabl...",1,1.268562,4d3b31bd-a497-4ff7-aa65-65a31543687c,50cf61ee-37ee-418c-a0ad-60f72013c94c
2,Which planet has the most moons in our solar s...,I don't know the answer to that question.,,Saturn has the most moons in our solar system ...,1,0.939871,64daf27c-4313-470d-b5fa-d137941b9d6a,9d6a7b67-ce88-49d9-a85b-afc9e5fd4efb
3,How many hearts does an octopus have?,An octopus has three hearts.,,An octopus has three hearts! Two pump blood to...,1,0.941597,775ff707-d8ec-4bf4-b494-375cd39c83ee,228d9110-3dfb-4881-8793-d3e791c5cafd
4,How many smell receptors do dogs have compared...,I don't know.,,Dogs have about 300 million smell receptors co...,1,1.234641,7934f8be-0fff-445b-af03-3d5ab63acd61,3debf385-2812-43de-93b4-7ebf12959a00
5,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.025148,87d99b5b-494c-42c6-8619-b21e92c47b3f,aaf4a50e-7b19-45dd-bef6-01de5290b060
6,Which ocean is the deepest point on Earth loca...,The deepest point on Earth is located in the P...,,The deepest point on Earth is the Challenger D...,1,1.147205,d056f4df-af36-4942-a406-6e9e014cc55a,8f9cae1e-a0d1-4571-a086-78e1ade23bed
7,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.007888,f6c8615d-9c9d-45a8-917f-37ddd577adce,75e50c1f-d91e-4e96-9b18-7f7c8d4ceac0
8,Which fruit was once considered poisonous in E...,I don't have the specific information about wh...,,Tomatoes were once considered poisonous in Eur...,1,1.073188,fd284fc9-cdef-4d44-b36e-6b77bc918a0d,19b07828-df80-403b-9bc0-e7ed7af5fe9d
9,What's the smallest bone in the human body?,The smallest bone in the human body is the sta...,,The stapes bone in the middle ear is the small...,1,1.102862,ff762f26-8c37-40f8-ae80-1607a7dea735,3ee98965-1b2b-4c47-8888-9303a0b5fea6


##### Dataset Split

You can run an experiment on a specific split of your dataset, let's try running on the Crucial Examples split.

In [8]:
evaluate(
    target_function,
    data=client.list_examples(dataset_name=dataset_name, splits=["important examples"]),  # We pass in a list of Splits
    evaluators=[is_concise_enough],
    experiment_prefix="Important Examples split"
)

View the evaluation results for experiment: 'Important Examples split-804c9432' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=4fe593a3-c20f-4e32-b561-d88e77915161




5it [00:06,  1.28s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't have information on the number of smel...,,Dogs have about 300 million smell receptors co...,1,1.095136,7934f8be-0fff-445b-af03-3d5ab63acd61,facb0f4c-e84b-4a49-803c-1e12ab0674dd
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.206093,87d99b5b-494c-42c6-8619-b21e92c47b3f,f00a8950-81e6-477d-9a80-9228301e0adf
2,What is the capital city of Australia?,I don't know the answer to that question based...,,The capital city of Australia is Canberra! It ...,1,1.062882,886dc989-9af7-4e60-bed6-9835ffc005d4,64ceb34e-b2bb-44e6-9b4f-b4c0d378b229
3,Which ocean is the deepest point on Earth loca...,The deepest point on Earth is located in the P...,,The deepest point on Earth is the Challenger D...,1,1.096721,d056f4df-af36-4942-a406-6e9e014cc55a,a187b4a4-26ac-4e2d-abfd-a779a10742ff
4,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.437762,f6c8615d-9c9d-45a8-917f-37ddd577adce,a762548a-617a-4c4d-908f-79999d149729


##### Specific Data Points

You can specify individual data points to run an experiment over as well

In [10]:
evaluate(
    target_function,
    data=client.list_examples(
        dataset_name=dataset_name, 
        example_ids=[   # We pass in a specific list of example_ids
            # TODO: You will need to paste in your own example ids for this to work!
            "7934f8be-0fff-445b-af03-3d5ab63acd61",
            "87d99b5b-494c-42c6-8619-b21e92c47b3f"
        ]
    ),
    evaluators=[is_concise_enough],
    experiment_prefix="two specific example ids"
)

View the evaluation results for experiment: 'two specific example ids-813eb4bd' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=7f1eb321-c248-452c-96d6-6bdd7d420c06




2it [00:03,  1.57s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't know the answer to that question.,,Dogs have about 300 million smell receptors co...,1,1.607761,7934f8be-0fff-445b-af03-3d5ab63acd61,4389bc09-9c14-4752-b6a0-4973243bc89c
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.037257,87d99b5b-494c-42c6-8619-b21e92c47b3f,397ab264-f514-4403-bce5-808c9d7d7582


### Other Parameters

##### Repetitions

You can run an experiment several times to make sure you have consistent results

In [11]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="two repetitions",
    num_repetitions=2   # This field defaults to 1
)

View the evaluation results for experiment: 'two repetitions-0d2f996e' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=044a37db-aedf-4495-8713-c0ed7e579578




30it [00:38,  1.27s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't know.,,Dogs have about 300 million smell receptors co...,1,1.610485,7934f8be-0fff-445b-af03-3d5ab63acd61,fccc1b92-6a06-4f9d-a37b-d88724695763
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,0.948,87d99b5b-494c-42c6-8619-b21e92c47b3f,b1e388f1-e95e-4411-b9c2-4f7984343d8f
2,What is the capital city of Australia?,I don't have enough information to answer the ...,,The capital city of Australia is Canberra! It ...,1,1.057234,886dc989-9af7-4e60-bed6-9835ffc005d4,e36d1e0f-dce4-42b1-b797-ee276c1eff35
3,Which ocean is the deepest point on Earth loca...,The Mariana Trench in the Pacific Ocean is the...,,The deepest point on Earth is the Challenger D...,1,1.275023,d056f4df-af36-4942-a406-6e9e014cc55a,a8c04a2b-a64b-429d-b947-f5636c1aa804
4,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.150766,f6c8615d-9c9d-45a8-917f-37ddd577adce,206a29ed-17ba-4443-b210-187811a8e820
5,What organ is responsible for detoxifying chem...,The liver is responsible for detoxifying chemi...,,The liver is the organ responsible for detoxif...,1,1.968401,1040ae4e-5b0e-47ff-8e04-f8ef80b88f8a,9b608874-5284-4fff-8523-f9f60510f3e2
6,How do bees communicate with each other?,I don't have the information on how bees commu...,,Bees communicate with each other through a ser...,1,1.093987,7b455631-f65a-41ea-bd5d-3db230379a19,8ee3b22e-bedb-45b8-a590-58af53c14a75
7,What is the largest desert in the world?,The largest desert in the world is the Antarct...,,The largest desert in the world is the Antarct...,1,1.383987,3b8c26e5-b9bb-438c-a2eb-daa9a5d63bc2,6a130d88-efbf-4ab2-a90b-82021c03897f
8,What is the longest river in the world?,I don't know the answer to that question.,,The Nile River is often considered the longest...,1,1.190072,098e6b5b-bb32-4548-83ae-e6407eec198c,3e8273a6-ef8b-4308-943c-642ca850e78b
9,What's the only food that never spoils?,Honey is the only food that never spoils due t...,,Honey is the only food that never spoils! Arch...,1,1.305845,1f37f9ce-17c7-4dd1-b166-927f8fcabb96,8f4a367b-2c44-4fb5-99e6-067273c5a516


##### Concurrency
You can also kick off concurrent threads of execution to make your experiments finish faster!

In [12]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="concurrency",
    max_concurrency=3,  # This defaults to None, so this is an improvement!
)

View the evaluation results for experiment: 'concurrency-cbc7a12f' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=433c0e15-276b-4002-950e-8b05e1d05016




15it [00:07,  2.11it/s]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't know.,,Dogs have about 300 million smell receptors co...,1,1.165901,7934f8be-0fff-445b-af03-3d5ab63acd61,7fd48c40-b994-43c2-9fb6-574bdea5c0d9
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.178241,87d99b5b-494c-42c6-8619-b21e92c47b3f,767bf545-3b07-4712-a673-dda766a0181c
2,What is the capital city of Australia?,I don't know the answer to that question based...,,The capital city of Australia is Canberra! It ...,1,1.647043,886dc989-9af7-4e60-bed6-9835ffc005d4,8822162f-04b1-42d4-a93d-4a89f30b8c66
3,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.10319,f6c8615d-9c9d-45a8-917f-37ddd577adce,01035e61-7df0-4b94-a151-77f398d646b1
4,Which ocean is the deepest point on Earth loca...,The deepest point on Earth is located in the P...,,The deepest point on Earth is the Challenger D...,1,1.282432,d056f4df-af36-4942-a406-6e9e014cc55a,c4c137c9-6b5a-4669-a73a-3babd7313dc3
5,What organ is responsible for detoxifying chem...,The liver is the organ responsible for detoxif...,,The liver is the organ responsible for detoxif...,1,0.939324,1040ae4e-5b0e-47ff-8e04-f8ef80b88f8a,63b316ed-83e8-4200-9e71-66e3269de8e2
6,How do bees communicate with each other?,I don't have information on how bees communica...,,Bees communicate with each other through a ser...,1,0.967625,7b455631-f65a-41ea-bd5d-3db230379a19,d5c8d478-c049-4508-9e3d-fdfe1663ecd8
7,What is the longest river in the world?,I don't know the answer to that question.,,The Nile River is often considered the longest...,1,0.897427,098e6b5b-bb32-4548-83ae-e6407eec198c,0865a465-099c-4909-a91b-4b1380b48902
8,What is the largest desert in the world?,The largest desert in the world is the Antarct...,,The largest desert in the world is the Antarct...,1,1.301124,3b8c26e5-b9bb-438c-a2eb-daa9a5d63bc2,786268a6-7d27-47b1-a7a2-9cac044a9e85
9,Which planet has the most moons in our solar s...,I don't know the answer to that question.,,Saturn has the most moons in our solar system ...,1,0.932402,64daf27c-4313-470d-b5fa-d137941b9d6a,2f2dc216-e699-4455-bfc8-aeb8a5ecbfb2


##### Metadata 

You can (and should) add metadata to your experiments, to make them easier to find in the UI

In [13]:
evaluate(
    target_function,
    data=dataset_name,
    evaluators=[is_concise_enough],
    experiment_prefix="metadata added",
    metadata={  # We can pass custom metadata for the experiment, such as the model name
        "model_name": MODEL_NAME
    }
)

View the evaluation results for experiment: 'metadata added-d916e33a' at:
https://smith.langchain.com/o/7c0013ad-db6b-4270-a51d-fd50bff1aee4/datasets/ce78dc7d-86c4-43f8-8302-564597140f19/compare?selectedSessions=5ac29eb4-f82f-47f1-8341-1a3ee10f719b




15it [00:19,  1.32s/it]


Unnamed: 0,inputs.question,outputs.output,error,reference.output,feedback.is_concise,execution_time,example_id,id
0,How many smell receptors do dogs have compared...,I don't know the answer to that question.,,Dogs have about 300 million smell receptors co...,1,0.985831,7934f8be-0fff-445b-af03-3d5ab63acd61,e0add679-8a1d-445d-a6f5-9c88137c826e
1,What's the only mammal capable of true flight?,The only mammal capable of true flight is the ...,,Bats are the only mammals capable of true flig...,1,1.147389,87d99b5b-494c-42c6-8619-b21e92c47b3f,ead65cd2-92d3-49a0-8919-018e0b65b69f
2,What is the capital city of Australia?,I don't know the answer to your question about...,,The capital city of Australia is Canberra! It ...,1,1.248038,886dc989-9af7-4e60-bed6-9835ffc005d4,a129e668-ba75-4f6e-8c60-6c199591397b
3,Which ocean is the deepest point on Earth loca...,The Mariana Trench in the Pacific Ocean is the...,,The deepest point on Earth is the Challenger D...,1,1.327816,d056f4df-af36-4942-a406-6e9e014cc55a,1862348b-4eb7-4330-896d-a201b7fba3e0
4,What animal can sleep for up to 22 hours a day?,The animal that can sleep for up to 22 hours a...,,Koalas can sleep for up to 22 hours a day! The...,1,1.096818,f6c8615d-9c9d-45a8-917f-37ddd577adce,54d1fd73-be2b-4abe-bf54-3d36acc221c6
5,What organ is responsible for detoxifying chem...,The liver is the organ responsible for detoxif...,,The liver is the organ responsible for detoxif...,0,1.62028,1040ae4e-5b0e-47ff-8e04-f8ef80b88f8a,bcdd8fce-5f63-4029-9965-8b2a8468ca53
6,How do bees communicate with each other?,I don't have information on how bees communica...,,Bees communicate with each other through a ser...,1,1.095404,7b455631-f65a-41ea-bd5d-3db230379a19,bc7047f3-29ba-474b-895d-7fedcb1590a8
7,What is the largest desert in the world?,The largest desert in the world is the Antarct...,,The largest desert in the world is the Antarct...,1,1.883521,3b8c26e5-b9bb-438c-a2eb-daa9a5d63bc2,a0c4e8d0-4ba7-417a-b4cc-b26cc95de228
8,What is the longest river in the world?,"I don't know the answer to ""What is the longes...",,The Nile River is often considered the longest...,1,1.028028,098e6b5b-bb32-4548-83ae-e6407eec198c,f1a4bccf-62c4-41c4-aed4-e3656e23d9a3
9,What's the only food that never spoils?,Honey is the only food that never spoils due t...,,Honey is the only food that never spoils! Arch...,0,2.343351,1f37f9ce-17c7-4dd1-b166-927f8fcabb96,2a51d5af-663c-4f3c-b973-42c6837188a0
