In [1]:
from openai import OpenAI
import json
import minsearch

In [2]:
with open ('/Users/dmitrywer/Desktop/my_projects/LLM_Zoomcamp/01-intro/documents.json','r') as file:
    docs_raw = json.load(file)

In [3]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [4]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [5]:
index.fit(documents)

<minsearch.Index at 0x17fddbe30>

In [6]:
index.search('Setting up dbt locally with Docker and Postgres?', num_results=2)

[{'text': 'This is not a FAQ but more of an advice if you want to set up dbt locally, I did it in the following way:\nI had the postgres instance from week 2 (year 2024) up (the docker-compose)\nmkdir dbt\nvi dbt/profiles.yml\nAnd here I attached this content (only the required fields) and replaced them with the proper values (for instance mine where in the .env file of the folder of week 2 docker stuff)\ncd dbt && git clone https://github.com/dbt-labs/dbt-starter-project\nmkdir project && cd project && mv dbt-starter-project/* .\nMake sure that you align the profile name in profiles.yml with the dbt_project.yml file\nAdd this line anywhere on the dbt_project.yml file:\nconfig-version: 2\ndocker run --network=mage-zoomcamp_default --mount type=bind,source=/<your-path>/dbt/project,target=/usr/app --mount type=bind,source=/<your-path>/profiles.yml,target=/root/.dbt/profiles.yml ghcr.io/dbt-labs/dbt-postgres ls\nIf you have trouble run\ndocker run --network=mage-zoomcamp_default --mount t

In [7]:
def search(question):
    boost = {'question' : 3.0, 'section' : 0.5}
    results = index.search(query = question, num_results = 3, boost_dict = boost, filter_dict = {'course' : 'data-engineering-zoomcamp'})
    return results

In [8]:
question = 'Setting up dbt locally with Docker and Postgres'

In [9]:
search(question)

[{'text': 'This is not a FAQ but more of an advice if you want to set up dbt locally, I did it in the following way:\nI had the postgres instance from week 2 (year 2024) up (the docker-compose)\nmkdir dbt\nvi dbt/profiles.yml\nAnd here I attached this content (only the required fields) and replaced them with the proper values (for instance mine where in the .env file of the folder of week 2 docker stuff)\ncd dbt && git clone https://github.com/dbt-labs/dbt-starter-project\nmkdir project && cd project && mv dbt-starter-project/* .\nMake sure that you align the profile name in profiles.yml with the dbt_project.yml file\nAdd this line anywhere on the dbt_project.yml file:\nconfig-version: 2\ndocker run --network=mage-zoomcamp_default --mount type=bind,source=/<your-path>/dbt/project,target=/usr/app --mount type=bind,source=/<your-path>/profiles.yml,target=/root/.dbt/profiles.yml ghcr.io/dbt-labs/dbt-postgres ls\nIf you have trouble run\ndocker run --network=mage-zoomcamp_default --mount t

In [10]:
def build_prompt(question, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION. If there is no answer, return NONE.
    
QUESTION: {question}
    
CONTEXT: 
{context}
""".strip()
    context = ''
    for doc in search_results:
        context = context + f'section: {doc['section']}\nquestion: {doc['question']}\ncontext: {doc['text']}\n\n'
    return prompt_template.format(question = question, context = context)
    

In [11]:
def llm(prompt):
    client = OpenAI(
        base_url='http://localhost:11434/v1/',
        api_key='ollama',
        )
    response = client.chat.completions.create(
        model = 'phi3',
        messages = [{'role' : 'user', 'content' : prompt}])
    return response.choices[0].message.content

In [12]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    result = llm(prompt).strip()
    return result

In [13]:
rag('How the project would be evaludated?')

"The project will be evaluated through a peer review process where 3 randomly assigned students who also submitted their projects evaluate your work. Each reviewer should provide feedback in writing based on the reproducibility criterion within two days as mentioned during Zoomcamp Q&A sessions, which is crucial for assessing whether someone can re-run everything or not if it was impossible initially. A follow-up comment will be added to address any difficulties faced due to limited resources like internet access issues; however, in the absence of this resource, trying your best with what you have could suffice as great effort is valued even without perfect reproducibility. The final grade received from these peer reviews should reflect efforts towards ensuring replicability within project work constraints and will be determined by taking into consideration whether such steps were taken or not in the event that complete execution wasn't feasible for a given reviewer, as discussed durin