In [1]:
import minsearch
import json
from openai import OpenAI

In [2]:
with open("./data/documents.json", "rt") as f_in:
    docs_raw = json.load(f_in)

In [3]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict["documents"]:
        doc["course"] = course_dict["course"]
        documents.append(doc)

In [4]:
index = minsearch.Index(
    text_fields = ["question", "text", "section"],
    keyword_fields = ["course"]
  ) 

In [5]:
q = "The course has already started, can I still enroll?"

In [6]:
index.fit(documents)

<minsearch.minsearch.Index at 0x1117f4eb0>

In [7]:
client = OpenAI(
    base_url = "http://localhost:11434/v1",
    api_key = "ollama"
)

In [8]:
def search(query):
    boost = {"question": 3.0, "section": 0.5}

    results = index.search(
        query=query,
        filter_dict={"course": "data-engineering-zoomcamp"},
        boost_dict=boost,
        num_results=5
    )
    return results

In [9]:
def build_prompt(query, search_results):
    prompt_template = """
You are a teaching assistant for a course. Answer the QUESTION, based on the CONTEXT.
Use only the facts from the CONTEXT, when answering the QUESTION.
Keep the answer succinct and to the point.

QUESTION: {question}

CONTEXT:
{context}
""".strip()
    context = ""

    for doc in search_results:
        context += f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [10]:
def llm(prompt):
    response = client.chat.completions.create(
    model='mistral',
    messages=[{'role': 'user', 'content': prompt}]
)
    return response.choices[0].message.content

In [11]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [12]:
print(rag("The course has already started, can I still enroll?"))

 Based on the context provided, you cannot enroll in the course if it has already started on January 15th, 2024 at 17:00. However, you can still access the materials after the course finishes and work on the homework assignments without enrolling. If you want to follow along during or after the course, you need to install and set up all dependencies as mentioned before the course starts. Additionally, if you choose the self-paced mode, you can get support through the provided Slack channel.
