In [1]:
import minsearch
import json

# Read data

In [2]:
with open('documents.json', 'r') as f_in:
    raw_doc = json.load(f_in)

In [3]:
documents = []
for course_dict in raw_doc:
    course = course_dict['course']
    for doc in course_dict['documents']:
        doc['course'] = course
        documents.append(doc)

# Create minsearch index

In [4]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [5]:
index.fit(documents)

<minsearch.Index at 0x7f8a05d0a610>

In [6]:
q = 'the course has already started, can I still enroll?'

In [7]:
boost = {'question': 3.0, 'section': 0.5}
index.search(
    query=q,
    boost_dict=boost,
    num_results=5
)

[{'text': 'Yes, you can. You won’t be able to submit some of the homeworks, but you can still take part in the course.\nIn order to get a certificate, you need to submit 2 out of 3 course projects and review 3 peers’ Projects by the deadline. It means that if you join the course at the end of November and manage to work on two projects, you will still be eligible for a certificate.',
  'section': 'General course-related questions',
  'question': 'The course has already started. Can I still join it?',
  'course': 'machine-learning-zoomcamp'},
 {'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
  'section': 'General course-related questions',
  'question': 'Course - Can I still join the course after the start date?',
  'course': 'data-engineering-zoomcamp'},
 {'text': 'Yes, we will keep all the materials after the cour

In [8]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}
    result = index.search(
        query=q,
        boost_dict=boost,
        num_results=5
    )
    return result

# Use LLM for generation answer

### OpenAI

In [10]:
from openai import OpenAI

In [21]:
clientO = OpenAI()

In [22]:
response = clientO.chat.completions.create(
    model='gpt-4o',
    messages = [
        {
            'role': 'user',
            'content': q
        }
    ]
)
print(response.choices[0].message.content)

It depends on the specific course and the policies of the institution offering it. Some courses allow late enrollment within a certain timeframe, while others may have strict deadlines. Here are a few steps you can take:

1. **Check the Course Policy:** Review the course's official guidelines regarding late enrollment.

2. **Contact the Instructor:** Reach out to the course instructor or professor to discuss your situation. They might be able to make an exception or provide guidance on how to catch up on missed material.

3. **Consult the Registrar or Admissions Office:** They can provide official information on enrollment deadlines and any potential penalties or additional requirements for late enrollment.

4. **Review the Syllabus:** If you can access the syllabus, review it to understand how much material you've missed and what you'll need to do to catch up.

5. **Consider the Workload:** Be honest with yourself about your ability to catch up on missed work while keeping up with ong

In [19]:
from anthropic import Anthropic

### Anthropic

In [20]:
clientA = Anthropic()

In [28]:
response = clientA.messages.create(
    model="claude-3-haiku-20240307",
    max_tokens = 500,
    messages=[
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": q
            }
          ]
        }
  ]
)
print(response.content[0].text)

Whether you can still enroll in a course that has already started depends on the policies and deadlines of the specific educational institution or program. Here are a few things to consider:

1. Check the enrollment/add deadline: Most institutions have a deadline, usually within the first week or two of the term, when students can still add new courses. This enrollment period is often referred to as the "add/drop" period.

2. Talk to an academic advisor: If the official add/drop period has passed, reach out to an academic advisor or the registrar's office. Explain your situation and see if they can make an exception or if there are any other options for you to still join the course.

3. Seek instructor permission: In some cases, you may be able to get permission directly from the course instructor to enroll late. The instructor may be willing to work with you to help you catch up on any missed material.

4. Consider auditing the course: If you can't officially enroll, you may be able t

# Put together

In [31]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

In [34]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [35]:
def llm_openai(prompt):
    response = clientO.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [37]:
def llm_anth(prompt):
    response = clientA.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens = 500,
        messages=[
            {
              "role": "user",
              "content": [
                {
                  "type": "text",
                  "text": prompt
                }
              ]
            }
      ]
    )
    return response.content[0].text

In [39]:
def rag_openai(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm_openai(prompt)
    return answer

def rag_anth(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm_anth(prompt)
    return answer

In [41]:
q = "how do I run kafka?"

In [42]:
rag_openai(q)

'To run Kafka for Java in the terminal, navigate to your project directory and use the following command:\n\n```bash\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\n\nMake sure to replace `<jar_name>` with the actual name of your jar file.'

In [43]:
rag_anth(q)

'Based on the context provided in the FAQ database, the answer to the question "how do I run kafka?" is:\n\nTo run Kafka, you can follow these steps:\n\n1. **Java Kafka**:\n   - In the project directory, run the following command to run the Java Kafka producer, consumer, KStreams, etc.:\n     ```\n     java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n     ```\n\n2. **Python Kafka**:\n   - Create a virtual environment and install the necessary packages:\n     ```\n     python -m venv env\n     source env/bin/activate\n     pip install -r ../requirements.txt\n     ```\n   - Activate the virtual environment before running the Python files:\n     ```\n     source env/bin/activate\n     ```\n   - To deactivate the virtual environment:\n     ```\n     deactivate\n     ```\n\nNote that the virtual environment should be created only to run the Python file, and Docker images should be up and running first.'

# Use Elastic search