In [1]:
import minsearch
import json

from openai import OpenAI
import os

In [2]:
with open("documents.json", "rt") as f_in:
    docs_raw = json.load(f_in)

In [3]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict["documents"]:
        doc["course"] = course_dict["course"]
        documents.append(doc)

In [4]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [5]:
index.fit(documents)

<minsearch.minsearch.Index at 0x75b660646ae0>

In [6]:
client = OpenAI()

In [7]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content":"q"}]
)

In [8]:
def search(query):
    boost = {"question": 3.0, "section": 0.5}

    results = index.search(
        query=query,
        filter_dict={"course": "data-engineering-zoomcamp"},
        boost_dict=boost,
        num_results=5)
    return results

In [9]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT: {context}
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [10]:
def llm(prompt):
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content":prompt}]
    )
    return response.choices[0].message.content

In [11]:
query = 'how do i run kafka?'

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [12]:
rag(query)

"To run Kafka in the terminal when working with Java Kafka projects, you need to execute the following command within the project directory:\n\n```bash\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\n\nIf you are working with a Python project, you should consider creating a virtual environment and running the Python files within it. Here are the steps to set up the virtual environment:\n\n1. Create a virtual environment and install the necessary packages:\n   ```bash\n   python -m venv env\n   source env/bin/activate\n   pip install -r ../requirements.txt\n   ```\n\n2. Activate the virtual environment each time you run your Python files:\n   ```bash\n   source env/bin/activate\n   ```\n\n3. Deactivate the virtual environment when you are done:\n   ```bash\n   deactivate\n   ```\n   \nNote: For Windows, the path to activate the virtual environment is slightly different (it's `env/Scripts/activate`)."