In [1]:
!pip install minsearch



In [2]:
import minsearch
import json
import requests

In [3]:
docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

In [4]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [5]:
documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [6]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)
index.fit(documents)

<minsearch.minsearch.Index at 0x7187f2777f20>

In [7]:
## example query
q = "the course has already started, can I still enroll?"

In [8]:
boost = {"question": 3.0, "section": 0.5} # means the question field is more importan than the text from section
filter_ = {"course": "data-engineering-zoomcamp"}
index.search(query=q, filter_dict=filter_, boost_dict=boost, num_results=5)

[{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
  'section': 'General course-related questions',
  'question': 'Course - Can I still join the course after the start date?',
  'course': 'data-engineering-zoomcamp'},
 {'text': 'Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.\nYou can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your final capstone project.',
  'section': 'General course-related questions',
  'question': 'Course - Can I follow the course after it finishes?',
  'course': 'data-engineering-zoomcamp'},
 {'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 202

In [9]:
from openai import OpenAI

In [13]:
client = OpenAI(base_url="https://api.deepseek.com/v1")

In [14]:
response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "user", "content": q}
    ]
)
response

ChatCompletion(id='59c38a7d-5984-42ec-9c1e-f12190a67eb1', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Of course! This is a very common situation. The short answer is: **Yes, it is often possible to enroll after a course has started, but it depends entirely on the specific policies of the institution and the instructor.**\n\nHere’s a breakdown of what you need to consider and do immediately.\n\n### Factors That Determine if You Can Enroll\n\n1.  **Institutional Policy:** Colleges, universities, and online platforms have official deadlines. These are often called "**Add/Drop Deadlines**" or "**Late Registration Periods**."\n    *   There is usually a period at the very beginning of the term (e.g., the first week or two) where you can add a course with minimal penalty.\n    *   After that, you may need special permission, often from the instructor or a dean.\n\n2.  **Instructor Permission:** This is the most critical factor after th

In [16]:
response.choices[0].message.content # generic context from deepseek

'Of course! This is a very common situation. The short answer is: **Yes, it is often possible to enroll after a course has started, but it depends entirely on the specific policies of the institution and the instructor.**\n\nHere’s a breakdown of what you need to consider and do immediately.\n\n### Factors That Determine if You Can Enroll\n\n1.  **Institutional Policy:** Colleges, universities, and online platforms have official deadlines. These are often called "**Add/Drop Deadlines**" or "**Late Registration Periods**."\n    *   There is usually a period at the very beginning of the term (e.g., the first week or two) where you can add a course with minimal penalty.\n    *   After that, you may need special permission, often from the instructor or a dean.\n\n2.  **Instructor Permission:** This is the most critical factor after the official deadline has passed. The instructor will decide if you can catch up.\n    *   They will consider: How much work have you missed? (Quizzes, assignme

In [24]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT. Use only the facts from the CONTEXT when answearing the question.
if the CONTEXT doesn't contain teh answear, output NONE

QUESTION: {question}

CONTEXT: {context}
""".strip()

In [30]:
context = ""
for doc in index.search(query=q, filter_dict=filter_, boost_dict=boost, num_results=5):
    context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

In [31]:
prompt = prompt_template.format(question=q, context=context).strip()

In [32]:
print(prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT. Use only the facts from the CONTEXT when answearing the question.
if the CONTEXT doesn't contain teh answear, output NONE

QUESTION: the course has already started, can I still enroll?

CONTEXT: section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Course - Can I follow the course after it finishes?
answer: Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.
You can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your final capstone project.

In [33]:
response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {"role": "user", "content": prompt}
    ]
)
response

ChatCompletion(id='33ed3f4e-ce4b-437e-95b2-04b17a845233', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Yes, you can still enroll after the course has started. According to the context, you are still eligible to submit the homeworks even if you register after the start date. However, you should be aware of the deadlines for the final projects.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1759520861, model='deepseek-chat', object='chat.completion', service_tier=None, system_fingerprint='fp_ffc7281d48_prod0820_fp8_kvcache', usage=CompletionUsage(completion_tokens=49, prompt_tokens=525, total_tokens=574, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0), prompt_cache_hit_tokens=0, prompt_cache_miss_tokens=525))

In [35]:
for res in response.choices:
    print(res.message.content)

Yes, you can still enroll after the course has started. According to the context, you are still eligible to submit the homeworks even if you register after the start date. However, you should be aware of the deadlines for the final projects.
