In [1]:
!pip install minsearch



In [None]:
import minsearch
import json 

In [4]:
with open('documents.json', 'rt') as  f_in:
    docs_raw = json.load(f_in)

In [5]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [7]:
documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [9]:
index = minsearch.Index(
    text_fields=["questions", 'text', 'section'],
    keyword_fields=['course']
)

In [10]:
# SELECT * WHERE course = 'data-engineering-zoomcamp'

In [30]:
q = "The course has started already, Can I join the course?"

In [31]:
index.fit(documents)

<minsearch.minsearch.Index at 0x71b46acb9d60>

In [32]:
boost = {'question': 3.0, 'section': 0.5}


results = index.search(
    query=q,
    filter_dict={'course': 'data-engineering-zoomcamp'},
    boost_dict=boost,
)

In [33]:
results

[{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
  'section': 'General course-related questions',
  'question': 'Course - When will the course start?',
  'course': 'data-engineering-zoomcamp'},
 {'text': "It's up to you which platform and environment you use for the course.\nGithub codespaces or GCP VM are just possible options, but you can do the entire course from your laptop.",
  'section': 'General course-related questions',
  'question': 'Environment - Do we really have to use GitHub codespaces? I already have PostgreSQL & Docker installed.',
  'course': 'data

In [34]:
import os
from openai import OpenAI
from dotenv import load_dotenv

base_url=os.environ.get("BASE_URL")
api_key=os.environ.get("API_KEY")

client = OpenAI(
    base_url=base_url,
    api_key=api_key,
)

In [35]:
print(base_url)

https://openrouter.ai/api/v1


In [None]:
responce = client.chat.completions.create(
    model="deepseek/deepseek-chat-v3-0324:free",
    messages=[{
        "role":"user",
        "content":q
    }]
)

In [42]:
responce.choices[0].message.content

'Whether you can join a course that has already started depends on the specific policies of the institution or platform offering it. Here’s what you can do:\n\n1. **Check the Course Guidelines**:  \n   - Review the course description or syllabus for late enrollment policies. Some courses allow joining after the start date with access to recorded materials.\n\n2. **Contact the Instructor/Administrator**:  \n   - Reach out to the instructor, course coordinator, or support team (via email or platform messaging) to ask if late enrollment is permitted. They may grant access or suggest alternatives (e.g., auditing, partial participation).\n\n3. **Self-Paced vs. Live Courses**:  \n   - If the course is **self-paced** (pre-recorded lectures, flexible deadlines), you can likely join anytime.  \n   - For **live/synchronous** courses, late entry might be trickier, but instructors may accommodate you if deadlines (e.g., assignments) are manageable.\n\n4. **Platform Rules**:  \n   - On platforms li

In [None]:
prompt_template = """
Your are a course teaching assistant. 
Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT  when answering the QUESTION. 
If the CONTEXT doesn't containt the answer output None.

QUESTION:
{question}

CONTEXT:
{context}
""".strip()

In [None]:
print(prompt_template)

Your are a course teaching assistant. 
Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT  when answering the QUESTION. 
If the CONTEXT doesn't containt the answer output None.

QUESTION:
{question}

CONTEXT:
{context}


In [None]:
context = ""

for doc in results:
    context = context + f"section: {doc['section']}\nquestion:{doc['question']}\nanswer: {doc['text']}\n\n"

In [39]:
prompt = prompt_template.format(question=q, context=context).strip()

In [40]:
print(prompt)

Your are a course teaching assistant. 
Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from the CONTEXT  when answering the QUESTION. 
If the CONTEXT doesn't containt the answer output None.

QUESTION:
The course has started already, Can I join the course?

CONTEXT:
section: General course-related questions
question:Course - When will the course start?
answer: The purpose of this document is to capture frequently asked technical questions
The exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1
Subscribe to course public Google Calendar (it works from Desktop only).
Register before the course starts using this link.
Join the course Telegram channel with announcements.
Don’t forget to register in DataTalks.Club's Slack and join the channel.

section: General course-related questions
question:Environment - Do we really have to use GitHub codespaces? I already have PostgreSQL & Doc

In [None]:
responce = client.chat.completions.create(
    model="deepseek/deepseek-chat-v3-0324:free",
    messages=[{
        "role":"user",
        "content":prompt
    }]
)
print(responce.choices[0].message.content)

The CONTEXT does not explicitly state whether you can join the course after it has already started. However, it mentions that you can follow the course after it finishes in a self-paced mode (see the "Course - Can I follow the course after it finishes?" section). 

Since the course has a live component (e.g., "Office Hours" and peer reviews for certificates), joining after the start date may limit your ability to participate fully in these live activities. 

For a definitive answer, you would need to refer to additional information not provided in the CONTEXT. 

Output: None.
