In [10]:
import minsearch
import json
import requests
from elasticsearch import Elasticsearch

In [11]:
with open('documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [12]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [19]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)
index.fit(documents)

<minsearch.Index at 0x7f664adee000>

In [13]:
q = 'the course has already started, can I still enroll?'

In [14]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [15]:
def llm_llama(prompt: str, model='llama3'):
    url = 'http://localhost:11434/api/generate'
    data = {
        "model": f"{model}",
        "prompt": f"{prompt}"
    }
    response = requests.post(url, json=data)
    lines = response.text.strip().split('\n')

    response = []

    for line in lines:
        d = json.loads(line)
        if d['done'] == False:
            response.append(d['response'])
        elif d['done'] == True:
            model = d['model']
            context = d['context']
            total_duration = d['total_duration']
            load_duration = d['load_duration']
            eval_duration = d['eval_duration']
        else:
            raise ValueError
    response_text = "".join(response)
    return {
        "model" : model,
        "response" : response_text,
        "context" : context,
        "total_duration" : total_duration,
        "load_duration" : load_duration,
        "eval_duration" : eval_duration
    }

In [16]:
es_client = Elasticsearch('http://localhost:9200')
index_name = "course-questions"

def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [17]:
def rag(query):
    # search_results = search(query)
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm_llama(prompt)
    return answer

In [47]:
%%time
r = rag(q)

CPU times: user 20.4 ms, sys: 4.12 ms, total: 24.5 ms
Wall time: 1min 41s


In [48]:
print(r['response'])

Based on the context, the answer to your question "the course has already started, can I still enroll?" is:

Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.


In [49]:
%%time

query = 'What installations I should do?'
r = rag(query)
print(r['response'])

Based on the CONTEXT from the FAQ database, I would answer your QUESTION as follows:

Since you're asking about installations to do, I assume you're referring to Module 2: Workflow Orchestration. According to the provided information, you should focus on the following installations:

1. Download each .py/.sql file that corresponds to each block you created for the pipeline. These files can be found under 'data loaders', 'data transformers', and 'data exporters' folders.
2. Move the downloaded files to your GitHub repo folder and commit your changes.

These steps should help you complete the installations required for Module 2: Workflow Orchestration.
CPU times: user 33.7 ms, sys: 4.98 ms, total: 38.7 ms
Wall time: 3min 43s


### Using of my GPU for the inference

1. Isntall AMD HIP for windows
2. Install LM studio
3. Downlaod the model
4. Start the localhost
5. Get port of the host machine by running `ip route show | grep -i default | awk '{ print $3}'`
...               
    Profit !!!1


In [18]:
import subprocess

# Run the command and capture the output
result = subprocess.run("ip route show | grep -i default | awk '{ print $3}'", 
                        shell=True, 
                        capture_output=True, 
                        text=True)

# Save the output to a variable
output_ip_adress = result.stdout.strip()

# Print the output
print(output_ip_adress)

172.20.144.1


In [19]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url=f"http://{output_ip_adress}:1234/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  model="SanctumAI/Meta-Llama-3-8B-Instruct-GGUF",
  messages=[
    {"role": "system", "content": "Always answer in rhymes."},
    {"role": "user", "content": "Introduce yourself."}
  ],
  temperature=0.7,
)

print(completion.choices[0].message)

ChatCompletionMessage(content=" Greetings, I'm a machine with knowledge vast,\n\nLearning from humans, my insights amassed.\n\nIn texts and data, I find delight,\n\nAnswering questions day and night.\n\nWith logic so sharp, no query too tough,\n\nMy purpose is clear, to assist you rough.", role='assistant', function_call=None, tool_calls=None)


In [26]:
def llm_llama_local(
          prompt: str,
          host_ip_adress: str, 
          # model='SanctumAI/Meta-Llama-3-8B-Instruct-GGUF'):
          model='Phi-3-mini-4k-instruct-q4'):

    # Point to the local server
    client = OpenAI(base_url=f"http://{host_ip_adress}:1234/v1", api_key="lm-studio")
    
    completion = client.chat.completions.create(
      model=f"{model}",
      messages=[
        {"role": "user", "content": f"{prompt}"}
      ],
      temperature=0.7,
    )
    
    return completion.choices[0].message

In [27]:
def rag(query):
    # search_results = search(query)
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    # answer = llm_llama(prompt)
    answer = llm_llama_local(prompt, output_ip_adress)
    return answer

In [24]:
%%time
r = rag(q)
print(r)

ChatCompletionMessage(content=" Yes, even if you don't register, you're still eligible to submit the homeworks. However, be aware that there will be deadlines for turning in the final projects.\n\nAdditionally, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.", role='assistant', function_call=None, tool_calls=None)
CPU times: user 85 μs, sys: 18.1 ms, total: 18.2 ms
Wall time: 7.17 s


In [28]:
%%time
r = rag(q)
print(r)

ChatCompletionMessage(content=" Yes, you can still enroll in the course even if it has already started. However, be aware that there will be deadlines for turning in final projects, so plan accordingly and don't leave everything until the last minute.\n\n-----------\nNote: The information provided does not directly address whether new registrations are allowed after a course start date. However, based on context where students can submit homework even if they haven't registered, it is implied that enrollment might still be possible beyond the initial start date. Please consult with the instructor or course administrators for specific details regarding late registration policies and eligibility.", role='assistant', function_call=None, tool_calls=None)
CPU times: user 17.5 ms, sys: 0 ns, total: 17.5 ms
Wall time: 10.3 s


In [25]:
%%time

query = 'What installations I should do?'
r = rag(query)
print(r)

ChatCompletionMessage(content=" For Module 2: Workflow Orchestration, to submit your files for Homework 2, you should move the downloaded .py and .sql files that correspond to each block created in your pipeline from 'data loaders', 'data transformers', and 'data exporters' folders under /home/src/folder to your GitHub repo folder. Then commit these changes.\n\nRegarding VMs running out of space, if this happens:\n1. Delete data saved locally during ETL processes.\n2. Kill any related Prefect process.\n3. Use ncdu for finding large files and delete them if necessary. If you do so, eliminate caching from your flow code to avoid further issues.", role='assistant', function_call=None, tool_calls=None)
CPU times: user 17.1 ms, sys: 0 ns, total: 17.1 ms
Wall time: 14 s


In [None]:
%%time
r = rag(q)
print(r)