In [None]:
from openai import OpenAI
from pydantic import BaseModel
import json
import numpy as np
from tqdm.auto import tqdm
from dotenv import load_dotenv
load_dotenv()

openai_client = OpenAI()
from typing import List, Dict, Any
import requests 

In [None]:


docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)


from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)


<minsearch.append.AppendableIndex at 0x17295965190>

In [14]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [15]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

### implement the agentic loop using packages (toyai)

In [16]:
from toyaikit.llm import OpenAIClient
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.tools import Tools

In [17]:
# Initialize the tools:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [18]:
# Set up the instructions for our agent:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [19]:
# Create the runner configuration:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [20]:
# Let's run the loop method. We will also use DisplayingRunnerCallback for displaying the results:
callback = DisplayingRunnerCallback(chat_interface)

question = 'how do I install kafka'
loop_result = runner.loop(prompt=question, callback=callback)

In [26]:
DisplayingRunnerCallback(chat_interface)

<toyaikit.chat.runners.DisplayingRunnerCallback at 0x17298b90770>

In [27]:
loop_result

[{'role': 'developer',
  'content': "You're a course teaching assistant. \nYou're given a question from a course student and your task is to answer it.\n\nIf you want to look up the answer, explain why before making the call"},
 {'role': 'user', 'content': 'how do I install kafka'},
 ResponseOutputMessage(id='msg_0375f5e1095daa8d0068f41edffa588196a287d15b64817a38', content=[ResponseOutputText(annotations=[], text="Installing Apache Kafka involves several steps, and it can vary based on your operating system. I can look up specific installation instructions to ensure you get the most accurate and up-to-date information. This will include details for different platforms, like Windows, Mac, and Linux.\n\nLet's find that information!", type='output_text', logprobs=[])], role='assistant', status='completed', type='message'),
 ResponseFunctionToolCall(arguments='{"query":"how to install Kafka"}', call_id='call_4CDH426BEp8whqUaiYTeFkjx', name='search', type='function_call', id='fc_0375f5e1095

In [28]:
question = 'but how do I run it in python?'
loop_result = runner.loop(prompt=question, previous_messages=loop_result, callback=callback)
# Note that in this case, it decided not to make any further function calls since it had sufficient information.

In [29]:
runner.run();

Chat ended.


In [30]:
# add more tools to the agent
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)


add_entry_tool = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}


In [31]:
agent_tools.add_tool(add_entry, add_entry_tool)

In [32]:
runner.run();

Chat ended.


## Putting Tools in One Class
instead pf creating the tool function defintition schema use the the docstring and typehints

In [37]:
class SearchTools:

    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the FAQ database for entries matching the given query.
    
        Args:
            query (str): Search query text to look up in the course FAQ.
    
        Returns:
            List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
            output_ids=True
        )
    
        return results

    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new entry to the FAQ database.
    
        Args:
            question (str): The question to be added to the FAQ database.
            answer (str): The corresponding answer to the question.
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)

In [38]:
search_tools = SearchTools(index) # create an instance of the SearchTools class
agent_tools = Tools() # create a Tools container
agent_tools.add_tools(search_tools) # add the tools from the SearchTools instance, no need to define the schema manually


In [41]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [42]:
runner.run();

Chat ended.
