### Install libraries

In [1]:
!pip install elasticsearch==8.4.3
!pip install requests
!pip install tqdm==4.66.4



## Connect to elasticsearch

In [2]:
from elasticsearch import Elasticsearch

def connect_to_es():
    for _ in range(10):  # Retry up to 10 times
        try:
            es = Elasticsearch("http://localhost:9200", basic_auth=('elastic', 'DkIedPPSCb'))
            if es.ping():
                print("Connected to Elasticsearch")
                return es
        except Exception as e:
            print(f"Connection failed, retrying... ({e})")
            time.sleep(10)
    raise Exception("Failed to connect to Elasticsearch after several retries")
    
es = connect_to_es()
# Get cluster information
cluster_info = es.info()

# Display cluster information
cluster_info

Connected to Elasticsearch


ObjectApiResponse({'name': 'bee69b10ca04', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'igl-mFEfSEmOi0_-SS2pIg', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

## Download data to insert to elasticsearch

In [3]:
import requests 

def prepare_document(docs_url):
    docs_response = requests.get(docs_url)
    documents_raw = docs_response.json()

    documents = []

    for course in documents_raw:
        course_name = course['course']

        for doc in course['documents']:
            doc['course'] = course_name
            documents.append(doc)
            
    return documents

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
documents = prepare_document(docs_url)

## Create index in elasticsearch

In [4]:
from tqdm.auto import tqdm
def create_index(documents):    
    index_settings = {
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        },
        "mappings": {
            "properties": {
                "text": {"type": "text"},
                "section": {"type": "text"},
                "question": {"type": "text"},
                "course": {"type": "keyword"} 
            }
        }
    }

    index_name = "course-questions"

    if not es.indices.exists(index=index_name):
        es.indices.create(index=index_name, body=index_settings)
        print("Index created")
    else:
        print("Index already exists")
        
    for doc in tqdm(documents):
        doc_id = doc['course']
        es.index(index=index_name, document=doc)
    
create_index(documents)

  es.indices.create(index=index_name, body=index_settings)


Index created


  0%|          | 0/948 [00:00<?, ?it/s]

 ### Q3 Searching

In [15]:
def retrieve_score(query, index_name, max_results=3):
    es = connect_to_es()
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ["question^4", "text"],
                            "type": "best_fields"
                        }
                    }
                ]
            }
        }
    }
    response = es.search(index=index_name, body=search_query)
    result_docs = [{"score": hit['_score'], "source": hit['_source']} for hit in response['hits']['hits']]
    return result_docs


index_name = "course-questions"
query = "How do I execute a command in a running docker container?"
result_docs = retrieve_score(query, index_name, max_results=3)

for doc in result_docs:
    print(f"Score: {doc['score']}")
    print(doc['source']['question'])

Connected to Elasticsearch
Score: 84.050095
How do I debug a docker container?
Score: 75.54128
PGCLI - running in a Docker container
Score: 72.08518
Running multiple services in a Docker container


  response = es.search(index=index_name, body=search_query)


 ### Filtering

In [16]:
def filter_documents(query, index_name, max_results=3):
    es = connect_to_es()
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ["question^4", "text"],
                            "type": "best_fields"
                        }
                    }
                ],
                "filter": [
                    {
                        "term": {
                            "course": "machine-learning-zoomcamp"
                        }
                    }
                ]
            }
        }
    }
    response = es.search(index=index_name, body=search_query)
    result_docs = [{"score": hit['_score'], "question": hit['_source']['question'], "text": hit['_source']['text']} for hit in response['hits']['hits']]
    return result_docs

index_name = "course-questions"
query = "How do I execute a command in a running docker container?"
res = filter_documents(query, index_name, max_results=3)

print(res)
if len(res) >= 2:
    top_quest = res[2]['question']
    print(f"Top question: {top_quest}")
else:
    print("No results found.")

Connected to Elasticsearch
[{'score': 84.050095, 'question': 'How do I debug a docker container?', 'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)'}, {'score': 51.04628, 'question': 'How do I copy files from my local machine to docker container?', 'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani"}, {'score': 49.938507, 'question': 'How do I copy files from a different folder into doc

  response = es.search(index=index_name, body=search_query)


## Q5. Building a prompt

In [26]:
def build_prompt_new(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"Q: {doc['question']}\nA: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [34]:
query = "How do I execute a command in a running docker container?"
search_results = retrieve_documents(query, index_name, max_results=3)
res = build_prompt_new(query,search_results)
print(res)
len(res)

Connected to Elasticsearch
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: How do I execute a command in a running docker container?

CONTEXT: 
Q: How do I debug a docker container?
A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)

Q: How do I copy files from my local machine to docker container?
A: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:
docker cp /path/to/local/fil

  response = es.search(index=index_name, body=search_query)


1463

## Q6. Tokens

In [30]:
!pip install tiktoken



In [40]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o")
num_tokens = len(encoding.encode(res))
num_tokens

323