# Evaluating a Traceable Function

LangChain's `run_on_dataset` function.

In [1]:
import openai
from langchain.schema.runnable import RunnableLambda
from langsmith import traceable


@traceable(run_type="llm")
def complete(*args, **kwargs):
    return openai.ChatCompletion.create(
        *args,
        **kwargs,
    )


@traceable()
async def my_model(question: str, context: str):
    import asyncio

    messages = [
        {
            "role": "system",
            "content": f"Respond to the user, taking into account the context: {context}",
        },
        {
            "role": "user",
            "content": question,
        },
    ]
    await asyncio.sleep(0.1)
    completion = complete(model="gpt-3.5-turbo", messages=messages)
    format = RunnableLambda(lambda x: x.choices[0].message.content)
    return format.invoke(completion)

In [2]:
from langchain.smith.evaluation.runner_utils import RunnableTraceable

runnable_traceable = RunnableTraceable(my_model)

In [3]:
from langsmith.run_helpers import get_run_tree_context

In [4]:
import uuid

from langsmith import Client

client = Client()
uid = uuid.uuid4()
examples = [
    {
        "question": "What's 2+2",
        "context": "You are a pirate",
    },
    {
        "question": "Where did coffee originate from?",
        "context": "You are a knight of King Arthur.",
    },
]
# dataset_name = f"Evalutaing Traceables Walkthrough - {uid}"
# dataset = client.create_dataset(dataset_name)

# client.create_examples(
#     inputs=examples,
#     dataset_id=dataset.id,
# )

In [5]:
from langchain.callbacks.manager import trace_as_chain_group


@traceable()
async def group_traceable(examples):
    with trace_as_chain_group("My Group") as cb:
        res = await runnable_traceable.abatch(examples, {"callbacks": cb})
    return res

In [6]:
await group_traceable(examples)

['Ahoy matey! 2+2 be 4, me hearty!',
 'Ah, fair traveler, the origins of coffee are said to lie in the lands of Ethiopia; a land filled with enchanting tales and rich history. According to legend, it was there that a goat herder named Kaldi first discovered the invigorating properties of the coffee plant. It is said that Kaldi noticed his goats became quite spirited after feasting on the red cherries of a certain shrub. Intrigued, he decided to try these cherries himself and discovered their stimulating effect. Word of this remarkable discovery spread, leading to the cultivation and eventual enjoyment of coffee throughout the kingdom and beyond.']

In [7]:
# from langchain.smith import RunEvalConfig

# evaluation = RunEvalConfig(
#     evaluators=["criteria"],
# )

# client.run_on_dataset(
#     dataset_name=dataset_name,
#     llm_or_chain_factory=my_model,
#     evaluation=evaluation,
# )