In [None]:
from openai import OpenAI
from minsearch import AppendableIndex

In [2]:
openai_client = OpenAI()

In [3]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [4]:
"""
The difference between Index and AppendableIndex is that we can add new records to the appendable one,
while just Index can't grow after it's created.
"""

"\nThe difference between Index and AppendableIndex is that we can add new records to the appendable one,\nwhile just Index can't grow after it's created.\n"

In [5]:
index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x76cb8efbc710>

In [6]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [7]:
#function calling
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [8]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.
""".strip()

tools = [search_tool]

question = 'I just discovered the course. Can I still join it?'

chat_messages = [
    {"role": "developer", "content": instructions},
    {"role": "user", "content": question}
]

response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)


In [9]:
response.output[0]

ResponseFunctionToolCall(arguments='{"query":"Can I still join the course?"}', call_id='call_Jblr5chCO3On87ugZE68D5L2', name='search', type='function_call', id='fc_073ea002ad2b609a0068f7d73cd0bc819faefe5279023f6dad', status='completed')

In [10]:
#Processing fuction calls

In [11]:
search_results = search(query="join course late")

In [12]:
call = response.output[0]
chat_messages.append(call)

In [13]:
import json 

search_results = search(query="join course late")
search_results_json = json.dumps(search_results)

call_output = {
    "type": "function_call_output",
    "call_id": call.call_id,
    "output": search_results_json,
}

chat_messages.append(call_output)

In [14]:
#send results back to the agent 

In [15]:
response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)

In [16]:
response.output_text

"Yes, you can still join the course even if you've discovered it after the start date. You don't need to register to be eligible to submit homework. However, be mindful of deadlines for the final projects, so it's a good idea to keep up with the schedule. If you have any further questions, feel free to ask!"

In [17]:
#Adding explanations

In [18]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [19]:
tools = [search_tool]

question = 'I just discovered the course. Can I still join it?'

chat_messages = [
    {"role": "developer", "content": instructions},
    {"role": "user", "content": question}
]

response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)

In [20]:
response.output[0].content[0].text

"To provide an accurate answer to your question, I'll check the FAQ database for any information regarding late enrollment or joining the course after it has already started. This will ensure you receive the most relevant details about the enrollment process."

In [21]:
#Agentic Loop 

In [22]:
def make_call(call):
    f_name = call.name
    arguments = json.loads(call.arguments)

    if f_name == 'search':
        results = search(**arguments)
    # if you add another function, we can put it here
    # elif f_name == 'add_entry':
    #    results = add_entry(**arguments)
    else:
        raise ValueError(f'unknown function {f_name}')

    json_results = json.dumps(results)

    return {
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": json_results,
    }


In [23]:
question = 'I just discovered the course. Can I still join it?'

chat_messages = [
    {"role": "developer", "content": instructions},
    {"role": "user", "content": question}
]

while True:
    response = openai_client.responses.create(
        model='gpt-4o-mini',
        input=chat_messages,
        tools=tools
    )

    has_function_calls = False

    # Add response to chat history for LLM's "memory"
    chat_messages.extend(response.output)

    for entry in response.output:
        if entry.type == "function_call":
            print('Function call:')
            print(entry)
            result = make_call(entry)
            print('   ', 'Output:')
            print('   ', result['output'])
            chat_messages.append(result)
            has_function_calls = True
            print()

        elif entry.type == "message":
            print('Assistant:')
            print(entry.content[0].text)
            print()

    if not has_function_calls:
        break 

Assistant:
To provide you with the most accurate answer regarding course enrollment, I want to check the specific policies or FAQ related to joining the course, especially for latecomers. 

Let me look that up for you.

Function call:
ResponseFunctionToolCall(arguments='{"query":"Can I still join the course?"}', call_id='call_geSbwuBBAc0b9KrH8IX5Ao0B', name='search', type='function_call', id='fc_00d96629746b2dbf0068f7d7486bf0819ca23eff0a9aaaa579', status='completed')
    Output:
    [{"text": "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.", "section": "General course-related questions", "question": "Course - Can I still join the course after the start date?", "course": "data-engineering-zoomcamp"}, {"text": "No, you can only get a certificate if you finish the course with a \u201clive\u201d cohort. We don't award certificat