In [12]:
!pip install minsearch



In [1]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [2]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x1c950e74770>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [4]:
question = 'Can I still join the course?'

In [5]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [6]:
search_results = search(question)


In [7]:
buils_prompt = build_prompt(question, search_results)

In [8]:
print(buils_prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
Can I still join the course?
</QUESTION>

<CONTEXT>
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Certificate - Can I follow the course in a self-paced mode and get a certificate?
answer: No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.

section: General

In [None]:
from openai import OpenAI
client = OpenAI()

In [15]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [16]:
answer = llm(buils_prompt)

In [17]:
print(answer)

Yes, you can still join the course even after the start date. You are eligible to submit homework without registering. However, keep in mind that there will be deadlines for turning in final projects, so it's advisable not to leave everything until the last minute.


In [18]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [19]:
rag('How do I patch KDE under FreeSBD')

"I'm sorry, but the context does not provide any information on how to patch KDE under FreeSBD. Please refer to additional resources or documentation for guidance."

## AGENTIC RAG

In [20]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [22]:
question = 'How can I run docker on Windows 10'
context = "EMPTY"

In [25]:
prompt = prompt_template.format(question=question, context=context)

In [26]:
answer_json = llm(prompt)

In [28]:
import json

In [29]:
answer_dict = json.loads(answer_json)

In [30]:
print(answer_dict)

{'action': 'ANSWER', 'answer': 'To run Docker on Windows 10, you need to install Docker Desktop. Here are the steps to do so:\n\n1. **Check your Windows version**: Ensure you are running Windows 10 Pro, Enterprise, or Education (version 15063 or later). Home editions can also use Docker with WSL 2.\n\n2. **Download Docker Desktop**: Go to the [official Docker website](https://www.docker.com/products/docker-desktop) and download Docker Desktop for Windows.\n\n3. **Install Docker Desktop**: Run the downloaded installer and follow the installation instructions. You may need to enable the WSL feature during installation.\n\n4. **Start Docker Desktop**: After installation, launch Docker Desktop. You might need to log in with your Docker Hub account.\n\n5. **Configure Docker settings**: You can access settings to adjust resources or set the WSL integration.\n\n6. **Verify installation**: Open a command prompt or PowerShell and run `docker --version` to confirm Docker is installed correctly. 