In [5]:
from openai import OpenAI
openai_client = OpenAI()

In [6]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [7]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x7433336b64e0>

In [24]:
import json

Traditional RAG:

```python
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer
```

In [4]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [38]:
def make_call(call):
    args = json.loads(call.arguments)
    f_name = call.name

    if f_name == 'search':
        result = search(**args)
    else:
        raise Error(f'unknown function {f_name}')

    return {
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": json.dumps(result),
    }

In [8]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [13]:
tools = [search_tool]

In [32]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [46]:
question = 'I just discovered the course. Can I still join it?'

chat_messages = [
    {"role": "developer", "content": instructions},
    {"role": "user", "content": question}
]

In [47]:
while True: # the "agent" loop
    response = openai_client.responses.create(
        model='gpt-4o-mini',
        input=chat_messages,
        tools=tools
    )
    
    has_function_calls = False
    
    chat_messages.extend(response.output)
    
    for item in response.output:
        if item.type == 'message':
            print('Assistant:')
            print(item.content[0].text)
            print()
    
        if item.type == 'function_call':
            has_function_calls = True
            print('function call')
            print('    ', item.name, item.arguments)
            print()
            
            function_call_output = make_call(item)
            chat_messages.append(function_call_output)

    if not has_function_calls:
        break

Assistant:
To provide an accurate response, I'll check the FAQ database for information regarding enrollment deadlines or policies for new students who want to join the course. This will help ensure you receive the most relevant and specific information. I'll look that up now.

function call
     search {"query":"join course enrollment deadlines"}

Assistant:
Yes, you can still join the course, even if it's already started. You have the option to submit homeworks and continue engaging with course materials. However, keep in mind that there will be deadlines for final projects, so it's best not to leave everything until the last minute.

For more information on specific deadlines and any updates, you can check the course's official documents or announcements.

