In [2]:
import requests
import groq
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm
from groq import Groq

In [5]:
docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [11]:
def llm(prompt):
    client= Groq()
    response = client.chat.completions.create(model="llama-3.3-70b-versatile", messages=[{'role':'user','content':prompt}])
    return response.choices[0].message.content

In [32]:
es_client = Elasticsearch("http://localhost:9200")
es_client

<Elasticsearch(['http://localhost:9200'])>

In [35]:
index_name ='pkkk3_hw1'
index_setting ={
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}    
es_client.indices.create(index=index_name,body=index_setting)

for doc in tqdm(documents):
    es_client.index(index=index_name,document=doc)

  0%|          | 0/948 [00:00<?, ?it/s]

In [51]:
def elastic_search(es_query, index_name, filtername):
    search_query = {
        "size": 5, # number of results returned by elastic search
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": es_query,
                        "fields": ["question^4", "text"], # this shows question field is give 3 times more weight than text and section
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": filtername  #this key value is same filter_dict of index function in minsearch
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    #print(response)
    result_docs =[]
    result_scores=[]
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
        result_scores.append(hit['_score'])

    print('length of results:{0}'.format(len(result_scores)))
    return result_docs, result_scores

In [39]:
def rag_elastic(es_query, index_name):
    search_results, score_results = elastic_search(es_query, index_name)
    prompt = build_prompt(es_query, search_results)
    answer = llm(prompt)
    return answer

In [41]:
es_query= "How do execute a command on a Kubernetes pod?"
search_results, score_results = elastic_search(es_query, index_name,"")
print(score_results)

length of results:5
[44.50556, 35.433445, 33.70974, 33.2635, 32.589073]


In [45]:
es_query= "How do copy a file to a Docker container?"
search_results, score_results = elastic_search(es_query, index_name,'machine-learning-zoomcamp')
print(search_results[2]['question'])
print(score_results)

length of results:5
How do I copy files from a different folder into docker container’s working directory?
[73.38676, 66.688705, 59.812744, 53.622295, 43.647385]


In [65]:
es_query="How do I execute a command in a running docker container?"
search_results, score_results = elastic_search(es_query, index_name,'machine-learning-zoomcamp')
print(score_results)

prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. 
Use only the facts from CONTEXT for answering QUESTION.
                    
QUESTION:{question}

CONTEXT:
{context}
""".strip()

context_template="""
Q: {question}
A: {text}
""".strip()

context= ""
for doc in search_results:
    context = context + context_template.format(question=doc['question'],text=doc['text']).strip()
    context = context + "\n\n"
    
prompt = prompt_template.format(question=es_query, context=context).strip()
print(len(prompt))

length of results:5
[84.050095, 51.04628, 49.938507, 45.275463, 45.255775]
2705


In [66]:
answer=llm(prompt)
print(answer)

To execute a command in a running Docker container, you need to follow these steps:

1. Find the container-id by running the command `docker ps`.
2. Use the `docker exec` command to execute a command in the container. The basic syntax is `docker exec -it <container-id> bash`.

For example: 
`docker exec -it <container-id> bash`

This will allow you to execute commands inside the running Docker container.


In [67]:
import tiktoken

In [74]:
encoding = tiktoken.encoding_for_model("gpt-4o")
print(len(encoding.encode(prompt)))
#encoding =tiktoken.encoding_for_model("llama-3.3-70b-versatile")

620


In [75]:
encoding.decode_single_token_bytes(63842)

b"You're"