In [1]:
import minsearch

In [2]:
import json

In [3]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [4]:
documents = []
for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [5]:
index = minsearch.Index(
    text_fields=['question', 'text', 'section'],
    keyword_fields=['course']
)

In [6]:
index.fit(documents)

<minsearch.Index at 0x780b403ad4f0>

In [7]:
from openai import OpenAI

In [8]:
client = OpenAI()

In [9]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}
    results = index.search(
        query=query,
        boost_dict=boost,
        num_results=5,
    )
    return results

In [15]:
def build_prompt(query, search_results):
    prompt_template = '''
        You're a course teaching assistant. Answer the QUESTION based on the CONTEXT. 
        Use only the facts from the CONTEXT when answering the QUESTION.
        
        QUESTION: {question}
        CONTEXT: 
        {context}
    '''.strip()
    context = ''
    for doc in search_results:
        context = context + f"section {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [16]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{'role':'user', 'content':prompt}]
    )
    return response.choices[0].message.content

In [25]:
query = "the course just started, can I enroll?"

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer