In [3]:
!uv add minsearch==0.0.9

[2mResolved [1m177 packages[0m [2min 4.59s[0m[0m
[2mPrepared [1m1 package[0m [2min 252ms[0m[0m
[2mUninstalled [1m1 package[0m [2min 33ms[0m[0m
[2mInstalled [1m1 package[0m [2min 53ms[0m[0m
 [31m-[39m [1mminsearch[0m[2m==0.0.7[0m
 [32m+[39m [1mminsearch[0m[2m==0.0.9[0m


In [1]:
from gitsource import GithubRepositoryDataReader
from minsearch import AppendableIndex

reader = GithubRepositoryDataReader(
    repo_owner="evidentlyai",
    repo_name="docs",
    allowed_extensions={"md", "mdx"},
)
files = reader.read()

parsed_docs = [doc.parse() for doc in files]

index = AppendableIndex(
    text_fields=["title", "description", "content"],
    keyword_fields=["filename"]
)
index.fit(parsed_docs)

<minsearch.append.AppendableIndex at 0x203d337af90>

In [2]:
query = 'how do I use evidently to monitor my machine learning models?'

In [3]:
search_results = index.search(query)

In [21]:
from minsearch import Highlighter, Tokenizer
from minsearch.tokenizer import DEFAULT_ENGLISH_STOP_WORDS

In [24]:
stopwords = DEFAULT_ENGLISH_STOP_WORDS | {'evidently'}

highlighter = Highlighter(
    highlight_fields=['content'],
    max_matches=3,
    snippet_size=50,
    tokenizer=Tokenizer(stemmer='snowball', stop_words=stopwords)
)

In [25]:
highlighter.highlight(query, search_results)

[{'title': 'Use HuggingFace models',
  'description': 'How to use models from HuggingFace as evaluators.',
  'content': {'matches': ['...downloading and using ML **models** from HuggingFace. This l...',
    '...criteria from the source **model**, e.g. classify texts by ...',
    '.... There are:\n\n* Ready-to-**use** descriptors that wrap a ...'],
   'total_matches': 33},
  'filename': 'metrics/customize_hf_descriptor.mdx'},
 {'title': 'What is Evidently?',
  'description': 'Welcome to the Evidently documentation.',
  'mode': 'wide',
  'content': {'matches': ['...elps evaluate, test, and **monitor** data and AI-powered syst...',
    '...ucts: from predictive ML **models** to complex LLM-powered s...',
    '.../Card>\n  <Card title="ML **monitoring**" icon="table" href="/qui...'],
   'total_matches': 4},
  'filename': 'introduction.mdx'},
 {'title': 'Why Evidently?',
  'description': 'Why choose Evidently.',
  'content': {'matches': ['...ucts: whether it’s an ML **model**, an LLM app, o

In [26]:
file_index = {doc['filename']: doc['content'] for doc in parsed_docs}

In [27]:
file_index['docs/setup/self-hosting.mdx']



In [38]:
from typing import Any, Dict, List


class SearchTools:
    """
    Provides search and file retrieval utilities over an indexed data store.
    """

    def __init__(
        self,
        index: Any,
        highlighter: Any,
        file_index: Dict[str, str]
    ) -> None:
        """
        Initialize the SearchTools instance.

        Args:
            index: Searchable index providing a `search` method.
            highlighter: Highlighter used to annotate search results.
            file_index (Dict[str, str]): Mapping of filenames to file contents.
        """
        self.index = index
        self.highlighter = highlighter
        self.file_index = file_index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the index for results matching a query and highlight them.

        Args:
            query (str): The search query to look up in the index.

        Returns:
            List[Dict[str, Any]]: A list of highlighted search result objects.
        """
        search_results = self.index.search(
            query=query,
            num_results=5
        )

        return self.highlighter.highlight(query, search_results)

    def get_file(self, filename: str) -> str:
        """
        Retrieve a file's contents by filename.

        Args:
            filename (str): The filename of the file to retrieve.

        Returns:
            str: The file contents if found, otherwise an error message.
        """
        if filename in self.file_index:
            return self.file_index[filename]
        return f"file {filename} does not exist"


In [35]:
search_tools = SearchTools(index, highlighter, file_index)

In [45]:
instructions = """
You're a documentation assistant.

Answer the user question using only the documentation knowledge base.

Make 3 iterations:

1) First iteration:
   - Perform one search using the search tool to identify potentially relevant documents.
   - Explain (in 2–3 sentences) why this search query is appropriate for the user question.

2) Second iteration:
   - Analyze the results from the previous search.
   - Based on the filenames or documents returned, perform:
       - Up to 2 additional search queries to refine or expand coverage, and
       - One or more get_file calls to retrieve the full content of the most relevant documents.
   - For each search or get_file call, explain (in 2–3 sentences) why this action is necessary and how it helps answer the question.

3) Third iteration:
   - Analyze the retrieved document contents from get_file.
   - Synthesize the information from these documents into a final answer to the user.

IMPORTANT:
- At every step, explicitly explain your reasoning for each search query or file retrieval.
- Use only facts found in the documentation knowledge base.
- Do not introduce outside knowledge or assumptions.
- If the answer cannot be found in the retrieved documents, clearly inform the user.

Additional notes:
- The knowledge base is entirely about Evidently, so you do not need to include the word "evidently" in search queries.
- Prefer retrieving and analyzing full documents (via get_file) before producing the final answer.
"""

In [46]:
from toyaikit.tools import Tools

agent_tools = Tools()
agent_tools.add_tools(search_tools)
agent_tools.get_tools()

[{'type': 'function',
  'name': 'get_file',
  'description': 'No description provided.',
  'parameters': {'type': 'object',
   'properties': {'filename': {'type': 'string',
     'description': 'filename parameter'}},
   'required': ['filename'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'No description provided.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [47]:
from openai import OpenAI
from toyaikit.llm import OpenAIClient

from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.interface import IPythonChatInterface
from toyaikit.chat.runners import DisplayingRunnerCallback

In [48]:
llm_client = OpenAIClient(
    model="gpt-4o-mini",
    client=OpenAI()
)

chat_interface = IPythonChatInterface()
runner_callback = DisplayingRunnerCallback(chat_interface=chat_interface) 

In [49]:
agent = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=llm_client
)

In [51]:
result = agent.loop(
    query + ' show me the code',
    callback=runner_callback
)

-> Response received


-> Response received


-> Response received


-> Response received
