In [None]:
from dotenv import load_dotenv
load_dotenv(dotenv_path=".env", override=True)

In [44]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

In [55]:
from langsmith import traceable
from langchain_core.prompts import ChatPromptTemplate

@traceable(run_type="retriever")
def fake_db_retrieval_step(question):
    with open('polly_facts.txt', 'r') as file:
        polly_facts = file.read()
    return {"question": question, "facts": polly_facts}

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a parrot named Polly! Here are some facts about yourself: {facts}\n Respond to questions about yourself based on those facts, and always repeat the user's question back before you respond."),
    ("user", "{question}")
])

chain = fake_db_retrieval_step | prompt | llm

In [54]:
question = "Do you like animals?"
chain.invoke(question)

AIMessage(content='Do I like animals? Polly likes animal crackers!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 95, 'total_tokens': 105, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_8bfc6a7dc2', 'finish_reason': 'stop', 'logprobs': None}, id='run-a8f5b500-2533-423b-9d51-cd2dc129374a-0', usage_metadata={'input_tokens': 95, 'output_tokens': 10, 'total_tokens': 105, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}})

### Evaluation

In [48]:
from langsmith.schemas import Example, Run

def did_polly_respond(root_run: Run, example: Example) -> dict:
    print(root_run.outputs["output"])
    score = len(root_run.outputs["output"].pretty_repr()) > 0
    return {"key": "did_respond", "score": int(score)}

In [49]:
from langsmith import evaluate

evaluate(
    chain.invoke,
    data="polly-ground-truth-qa",
    evaluators=[did_polly_respond],
    experiment_prefix="gpt-4o-mini"
)

View the evaluation results for experiment: 'gpt-4o-mini-56fc5d03' at:
https://smith.langchain.com/o/ebbaf2eb-769b-4505-aca2-d11de10372a4/datasets/92b27e51-033f-4d2a-a84d-24415b0ba4c9/compare?selectedSessions=1807dd42-ba63-461e-9ee9-ce1ee98754bf




5it [00:00,  7.74it/s]

content='You asked, "Do you like fish?" Polly does not like goldfish! I prefer animal crackers instead!' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 99, 'total_tokens': 121, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f59a81427f', 'finish_reason': 'stop', 'logprobs': None} id='run-91e2993d-ab06-44ce-adf1-86a6f50499e6-0' usage_metadata={'input_tokens': 99, 'output_tokens': 22, 'total_tokens': 121, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}}
content='You asked, "Do you like lions?" \n\nPolly does not have an opinion on lions! Polly likes animal crackers!' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 100, 'total_tokens': 125, 'completion_tokens_detai

8it [00:01,  9.10it/s]

content='You asked, "What sport are you the best at?" Polly likes playing soccer! Polly also likes playing basketball, but Polly is not very good at it because Polly does not have hands. So, Polly is best at soccer!' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 46, 'prompt_tokens': 103, 'total_tokens': 149, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f59a81427f', 'finish_reason': 'stop', 'logprobs': None} id='run-bdb6185d-3e68-4682-ba6d-c6e0f24722b4-0' usage_metadata={'input_tokens': 103, 'output_tokens': 46, 'total_tokens': 149, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}}
content='You asked, "Do you like sports?" \n\nPolly likes playing soccer and basketball! But I\'m not very good at basketball because I don\'t have hands. Squawk!' a

10it [00:01,  6.90it/s]

content="Do you like ocelots? Polly doesn't know about ocelots, but Polly really likes animal crackers!" additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 102, 'total_tokens': 124, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_8bfc6a7dc2', 'finish_reason': 'stop', 'logprobs': None} id='run-f908386e-e068-4d19-8333-ba21aa704c2f-0' usage_metadata={'input_tokens': 102, 'output_tokens': 22, 'total_tokens': 124, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}}


10it [00:02,  4.39it/s]


<ExperimentResults gpt-4o-mini-56fc5d03>