In [1]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [2]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x7a499b38f110>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [4]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [5]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [6]:
question = 'I just discovered the course. Can I still join it?'

In [8]:
from toyaikit.llm import OpenAIClient
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.tools import Tools

In [9]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [10]:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [11]:
callback = DisplayingRunnerCallback(chat_interface)

In [13]:
runner.run()

You: i just discover the course , can i join now


You: stop


Chat ended.


LoopResult(new_messages=[{'role': 'developer', 'content': "You're a course teaching assistant. \nYou're given a question from a course student and your task is to answer it.\n\nIf you want to look up the answer, explain why before making the call"}, {'role': 'user', 'content': 'i just discover the course , can i join now'}, ResponseOutputMessage(id='msg_0fd40dd1922534500068f7ed57fc9c8198a3660486c9a015ca', content=[ResponseOutputText(annotations=[], text="This seems like a common question regarding course enrollment. To provide you with the most accurate information, I'll check the course FAQ to see if there are any specific guidelines or deadlines related to joining the course at this time. Let's look that up!", type='output_text', logprobs=[])], role='assistant', status='completed', type='message'), ResponseFunctionToolCall(arguments='{"query":"enrollment joining course late registration"}', call_id='call_arJHnkrXrrjgTsyM3LJKqzaS', name='search', type='function_call', id='fc_0fd40dd19

In [12]:
results = runner.loop(
    prompt=question,
    callback=callback
)

In [14]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [15]:
add_entry_tool = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}

In [16]:
agent_tools.add_tool(add_entry, add_entry_tool)

In [17]:
runner.run()

You: how do we run kafka


You: how do i do well in module 1


You: save it in back to faq


You: stop


Chat ended.


LoopResult(new_messages=[{'role': 'developer', 'content': "You're a course teaching assistant. \nYou're given a question from a course student and your task is to answer it.\n\nIf you want to look up the answer, explain why before making the call"}, {'role': 'user', 'content': 'how do we run kafka'}, ResponseOutputMessage(id='msg_069b00a90a1648c20068f7f1013878819aa7c3496402448493', content=[ResponseOutputText(annotations=[], text="To provide a comprehensive answer on how to run Kafka, I'll look up specific instructions related to setting up and running Kafka, as the process can vary based on the environment (like local or cloud) and use case. \n\nLet me check the FAQ database for relevant information on this topic.", type='output_text', logprobs=[])], role='assistant', status='completed', type='message'), ResponseFunctionToolCall(arguments='{"query":"run kafka"}', call_id='call_eYX1fOVjlsW53uHsNFt1ZiGk', name='search', type='function_call', id='fc_069b00a90a1648c20068f7f1033454819abce8

In [18]:
index.docs[-1]

{'question': 'how do I do well in module 1',
 'text': "To excel in Module 1, follow these strategies: 1. **Understand the Basics:** Solidify your grasp of Docker and Terraform by familiarizing yourself with their commands and functionalities. 2. **Set Up Your Environment:** Ensure correct software versions and install necessary Python packages like psycopg2 and SQLAlchemy using: `pip install psycopg2 sqlalchemy`. 3. **Common Errors:** Be aware of common errors like ModuleNotFoundError and resolve them by checking for required modules. Verify your connection strings for database-related tasks. 4. **Hands-on Practice:** Engage actively with module exercises for practical understanding. 5. **Utilize Resources:** Refer to course materials, lecture notes, and additional recommended resources. 6. **Participation and Collaboration:** Engage in discussions and collaborate with peers for a better understanding of challenges. 7. **Stay Organized:** Keep your workspace tidy and document working c

In [19]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'add_entry',
  'description': 'Add an entry to the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'The question to be added to the FAQ database'},
    'answer': {'type': 'string', 'description': 'The answer to the question'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}}]