In [None]:
# !wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
# pip install python-dotenv

In [2]:
import minsearch
import json

In [3]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

# docs_raw[0]

In [4]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

# documents[0]

In [5]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [60]:
q='the course has already started, can I still enroll?'

In [61]:
index.fit(documents)

<minsearch.Index at 0x7631e3adee30>

In [62]:
boost = {'question': 3.0, 'section': 0.5}

results = index.search(
    query=q,
    filter_dict={'course': 'data-engineering-zoomcamp'},
    boost_dict=boost,
    num_results=5)

In [63]:
from openai import OpenAI

In [64]:
client = OpenAI()

In [66]:
response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": q}]
)

response.choices[0].message.content

"It's possible you may still be able to enroll in the course, but it depends on the policies of the institution or the specific course you're interested in. Here are some steps you can take:\n\n1. **Check the Enrollment Deadline:** Look at the course details or contact the admissions office to find out the official deadline for enrollment.\n\n2. **Contact the Instructor:** Reach out to the course instructor directly to explain your situation and inquire if late enrollment is an option.\n\n3. **Advising Office:** Visit or contact your academic advisor or the department offering the course to discuss your options.\n\n4. **Online Portals:** If the course is offered online, there might be an option to enroll through the school's online platform. Check any notifications or messages related to course enrollment.\n\n5. **Special Permissions:** Sometimes, departments or instructors can give special permission for late enrollment, particularly if you have a valid reason and the course isn't too

In [52]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [69]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.+
    
    QUESTION: {question}
    
    CONTEXT: 
    {context}
    """.strip()
    
    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt

In [54]:
def llm(prompt):
    response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [70]:
query = 'How do I run kafka?'

def rag(query):
    search_result = search(query)
    prompt = build_prompt(query, search_result)
    answer = llm(prompt)

    return answer

In [72]:
print(rag(query))

To run Kafka, you can follow the instructions below based on whether you're working with Java or Python:

### For Java:

Navigate to your project directory and run the following command:
```sh
java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java
```
Make sure to replace `<jar_name>` with the actual name of your jar file.

### For Python:

1. First, create and activate a virtual environment. In your terminal, run:
    ```sh
    python -m venv env
    source env/bin/activate   # MacOS/Linux
    # For Windows, use:
    # env\Scripts\activate
    ```

2. Install the necessary packages by running:
    ```sh
    pip install -r ../requirements.txt
    ```

3. To deactivate the virtual environment, run:
    ```sh
    deactivate
    ```

Ensure that your Kafka Docker images are up and running before executing these steps. These instructions should work on MacOS, Linux, and Windows (with slight path differences for activation).
