In [1]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [2]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x79c13ea358e0>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [4]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [5]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [6]:
question = 'I just discovered the course. Can I still join it?'

In [7]:
from toyaikit.llm import OpenAIClient
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.tools import Tools

In [9]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [10]:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [11]:
callback = DisplayingRunnerCallback(chat_interface)

In [12]:
results = runner.loop(
    prompt=question,
    callback=callback
)

In [14]:
results.cost

CostInfo(input_cost=0.00012764999999999999, output_cost=8.64e-05, total_cost=0.00021404999999999997)

In [16]:
runner.run();

You: I just discovered the course. Can I still join it?


You: stop


Chat ended.


In [16]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [17]:
add_entry_tool = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}


In [18]:
agent_tools.add_tool(add_entry, add_entry_tool)

In [20]:
runner.run();

You: how do I do well in module 1?


You: save it back to FAQ


You: stop


Chat ended.


In [21]:
index.docs[-1]

{'question': 'How do I do well in Module 1?',
 'text': "1. **Understand the Objectives**: Make sure you know the main topics and objectives covered in Module 1.\n\n2. **Engage with the Material**: Don't just passively read or watch videos. Take notes, summarize concepts in your own words, and explore related topics.\n\n3. **Practice Regularly**: If there are exercises or practical tasks, complete them diligently. This will reinforce your understanding.\n\n4. **Utilize Resources**: Use any supplementary materials provided, such as readings, videos, or forums, to deepen your understanding.\n\n5. **Collaborate with Peers**: Discuss concepts and problems with classmates. Collaboration can enhance learning.\n\n6. **Seek Help When Needed**: Don't hesitate to reach out to instructors or teaching assistants if you're struggling with any concepts.\n\n7. **Stay Organized**: Keep track of deadlines, assignments, and exam schedules to manage your time effectively.",
 'section': 'user added',
 'cou

In [30]:
from typing import Any, Dict, List

class SearchTools:

    def __init__(self, index):
        self.index = index
    
    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the index for documents related to the given query.
    
        Args:
            query (str): The search query string.
    
        Returns:
            List[Dict[str, Any]]: A list of search results, where each result is a 
            dictionary containing document fields and their corresponding values.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
        )
    
        return results

    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new question-answer entry to the index.
    
        Args:
            question (str): The question text to be added.
            answer (str): The corresponding answer text.
    
        Returns:
            None
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)


In [31]:
agent_tools = Tools()

In [32]:
search_tools = SearchTools(index)
agent_tools.add_tools(search_tools)

In [33]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new question-answer entry to the index.\n\nArgs:\n    question (str): The question text to be added.\n    answer (str): The corresponding answer text.\n\nReturns:\n    None',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the index for documents related to the given query.\n\nArgs:\n    query (str): The search query string.\n\nReturns:\n    List[Dict[str, Any]]: A list of search results, where each result is a \n    dictionary containing document fields and their corresponding values.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],