In [1]:
from openai import OpenAI

In [2]:
client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [5]:
import json
import minsearch

from openai import OpenAI

In [6]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [3]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION : {question}
    
    CONTEXT : {context}
    """.strip()

    context = ""

    for doc in search_results:
        context += f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    return prompt_template.format(question=query, context=context).strip()

In [4]:
from elasticsearch import Elasticsearch, exceptions
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def es_create_index_if_not_exists(es_client):
    try:
        index_settings = {
            "settings": {
                "number_of_shards": 1,
                "number_of_replicas": 0
            },
            "mappings": {
                "properties": {
                    "text": {"type": "text"},
                    "section": {"type": "text"},
                    "question": {"type": "text"},
                    "course": {"type": "keyword"} 
                }
            }
        }
    
        index_name = 'course-questions'
        
        es_client.indices.create(index=index_name, body=index_settings)
        
        for doc in tqdm(documents):
            es_client.index(index=index_name, document=doc)

    except exceptions.RequestError as ex:
        if ex.error == 'resource_already_exists_exception':
            pass # Index already exists. Ignore.
        else: # Other exception - raise it
            raise ex

In [6]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    index_name = 'course-questions'
    
    res = es_client.search(index=index_name, body=search_query)

    res_docs = []

    for hit in res['hits']['hits']:
        res_docs.append(hit['_source'])

    return res_docs

In [7]:
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [8]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    return llm(prompt)

In [9]:
es_client = Elasticsearch('http://localhost:9200')
es_client.info()
es_create_index_if_not_exists(es_client)

In [10]:
query = 'I just discovered the course, can I still join it?'
rag(query)

'Based on your question about joining a course later, here\'s what I found from our FAQ: Yes, even if you do not register right away after the start date of "Course-Name", according to answer #2 in section General Course-Related Questions - You are still eligible to submit homeworks as they may have submissions deadlines and recommendations.\nHowever, keep track that there might be final projects with specific handover dates. To ensure smooth course participation beyond its official start date: you can always continue working on the homework assignments until their respective completion requirements are met (if any), ensuring timely project turn-in is key to maintain a good academic standing in your program of choice — and thus avoid potentially missing out on valuable learning opportunities that could aid with course success down the road.\nAdditionally, per answer #3 which also appears under \'General Course Related Questions\', post session concludes provisions allow one continued s