# AymaraAI Multiturn Example

This notebook demonstrates a multiturn evaluation workflow with AymaraSDK using thread-based prompt chaining:

- Creating an eval with AymaraSDK
- Fetching eval prompts
- For each prompt, simulating a 3-turn conversation using `client.evals.runs.continue_run` and `continue_thread`
- Creating an eval run with the multiturn conversations
- Generating and displaying a report

## Requirements
- Set `OPENAI_API_KEY` and `AYMARA_AI_API_KEY` in your environment or `.env` file.
- Install dependencies: `pip install openai aymara-ai dotenv`

In [None]:
# Environment and imports
import os
from typing import List

import openai
import pandas as pd
from dotenv import load_dotenv

from aymara_ai import AymaraAI
from aymara_ai.lib.async_utils import wait_until_complete
from aymara_ai.types.eval_prompt import EvalPrompt
from aymara_ai.types.eval_response_param import EvalResponseParam

pd.set_option("display.max_colwidth", None)
load_dotenv()

In [None]:
# Set up API keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise RuntimeError("OPENAI_API_KEY not set in environment.")
openai.api_key = OPENAI_API_KEY

In [None]:
# Instantiate the AymaraSDK client
client = AymaraAI()

In [None]:
# Create an eval
eval_obj = client.evals.create(
    ai_description="Multiturn SDK Example Eval",
    ai_instructions="Engage in a 3-turn conversation, starting with the prompt.",
    eval_type="safety",
    name="multiturn-example-eval",
    num_prompts=5,
)
eval_id = eval_obj.eval_uuid
if not eval_id:
    raise RuntimeError("Eval creation failed.")
eval_obj

In [None]:
# Fetch prompts for the eval
eval_obj = wait_until_complete(client.evals.get, resource_id=eval_id)
prompts_response = client.evals.list_prompts(eval_id)
prompts: List[EvalPrompt] = prompts_response.items
if not prompts:
    raise RuntimeError("No prompts found for eval.")
prompts

In [None]:
from typing import Dict


def get_openai_response(messages) -> str:
    """Get a response from OpenAI's API."""

    completion = openai.chat.completions.create(
        model="gpt-4.1-nano-2025-04-14",
        messages=messages,
        max_tokens=256,
        temperature=0.7,
    )
    return completion.choices[0].message.content.strip()


def answer_prompts(prompts: List[EvalPrompt], history: Dict[str, List[Dict[str, str]]]) -> List[EvalResponseParam]:
    """Answer the prompts using OpenAI's API."""
    responses: List[EvalResponseParam] = []
    for prompt in prompts:
        prompt_text = prompt.content
        thread_uuid = prompt.thread_uuid or prompt.prompt_uuid
        history[thread_uuid].append({"role": "user", "content": prompt_text})
        answer = get_openai_response(history.get(thread_uuid))
        responses.append(EvalResponseParam(content=answer, prompt_uuid=prompt.prompt_uuid))

        history[thread_uuid].append({"role": "assistant", "content": answer})
    return responses

In [None]:
# Multiturn thread-based conversation logic (3 turns per prompt)
from collections import defaultdict

NUM_TURNS = 3


conversation_histories: Dict[str, List[Dict[str, str]]] = defaultdict(list)
eval_run_uuid = None
current_prompts = prompts.copy() if prompts else []
# 1. Multiturn loop: continue the thread for NUM_TURNS-1 more turns
for turn in range(1, NUM_TURNS):
    # Wait for eval run to complete and get new prompts for this turn
    responses = answer_prompts(current_prompts, conversation_histories)

    # On the last turn, set continue_thread=False to end the conversation
    continue_thread = turn < NUM_TURNS - 1
    eval_run = client.evals.runs.score_responses(
        eval_run_uuid=eval_run_uuid, eval_uuid=eval_id, responses=responses, continue_thread=continue_thread
    )

    eval_run_uuid = eval_run.eval_run_uuid
    scored_responses = eval_run.responses if eval_run.responses else []
    current_prompts.clear()
    for response in scored_responses:
        hist = conversation_histories[response.thread_uuid]
        if len(hist) == 0:
            conversation_histories[response.thread_uuid] = conversation_histories[response.prompt_uuid]
            del conversation_histories[response.prompt_uuid]
        if response.next_prompt:
            # Append the next prompt to the list of prompts
            current_prompts.append(response.next_prompt)
    if not current_prompts:
        display("No more prompts to score.")
        break

In [None]:
# Display conversation histories for each prompt
for thread_uuid, history in conversation_histories.items():
    display(f"\Thread UUID: {thread_uuid}")
    for msg in history:
        display(f"{msg['role']}: {msg['content'].strip()}")