In [1]:
import minsearch
import json
import os
from groq import Groq

In [2]:
os.environ['HF_HOME'] = 'run/cache/'
os.environ['GROQ_API_KEY'] = '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'
api_key = os.getenv('GROQ_API_KEY')

In [3]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [4]:
index = minsearch.Index(
    text_fields = ["question", "text", "section"],
    keyword_fields = ["course"]
)
index.fit(documents)

<minsearch.Index at 0x7fb4c390e6c0>

In [5]:
client = Groq(api_key=api_key)

In [6]:
def search(query):
    boost = {
    'question': 3.0,
    'section': 0.5,   
    }
    
    results = index.search(
        query = query,
        filter_dict = {'course': 'data-engineering-zoomcamp'},
        boost_dict = boost,
        num_results = 5
    )

    return results

In [7]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
    Only use the facts from the CONTEXT when answering the QUESTION.
    If the CONTEXT doesn't contain answer, output NONE. 
    Do not Quote the CONTEXT in the answer.

    QUESTION: {question}
    CONTEXT: {context}
    """.strip()
    
    context = ""
    
    for doc in search_results:
        context = context + f"section:{doc['section']}\n question:{doc['question']}\n text:{doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [8]:
def llm(prompt):
    response = client.chat.completions.create(
    #
    # Required parameters
    #
    messages=[
        # Set an optional system message. This sets the behavior of the
        # assistant and can be used to provide specific instructions for
        # how it should behave throughout the conversation.
        {
            "role": "system",
            "content": prompt
        },
        # Set a user message for the assistant to respond to.
        {
            "role": "user",
            "content": query,
        }
    ],

    # The language model which will generate the completion.
    model="llama-3.3-70b-versatile",

    #
    # Optional parameters
    #

    # Controls randomness: lowering results in less random completions.
    # As the temperature approaches zero, the model will become deterministic
    # and repetitive.
    temperature=0.5,

    # The maximum number of tokens to generate. Requests can use up to
    # 32,768 tokens shared between prompt and completion.
    max_tokens=1024,

    # Controls diversity via nucleus sampling: 0.5 means half of all
    # likelihood-weighted options are considered.
    top_p=1,

    # A stop sequence is a predefined or user-specified text string that
    # signals an AI to stop generating content, ensuring its responses
    # remain focused and concise. Examples include punctuation marks and
    # markers like "[end]".
    stop=None,

    # If set, partial message deltas will be sent.
    stream=False,
    )

    # Print the completion returned by the LLM.
    return response.choices[0].message.content

In [12]:
query = "The course has already started, can I still enroll?"

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [13]:
rag(query)

"Yes, you're still eligible to submit the homeworks even if you don't register after the start date, but be aware of the deadlines for turning in the final projects."

In [15]:
rag("How can I run kafka?")

'NONE'