In [1]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [3]:
len(documents)

948

In [5]:
from elasticsearch import Elasticsearch

es_client = Elasticsearch("http://localhost:9200")

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0,
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"}
        }
    }
}

index_name = "homework-course"
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'homework-course'})

In [6]:
from tqdm.auto import tqdm

for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

100%|██████████| 948/948 [01:12<00:00, 13.08it/s]


In [14]:
search_query = {
    "size": 3,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": "How do I execute a command in a running docker container?",
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "course": "machine-learning-zoomcamp"
                }
            }
        }
    } 
}

response = es_client.search(index=index_name, body=search_query)
response['hits']['hits']

[{'_index': 'homework-course',
  '_id': 'e1zQK5AB87iXdtukseRr',
  '_score': 84.050095,
  '_source': {'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)',
   'section': '5. Deploying Machine Learning Models',
   'question': 'How do I debug a docker container?',
   'course': 'machine-learning-zoomcamp'}},
 {'_index': 'homework-course',
  '_id': 'mlzQK5AB87iXdtukvuRz',
  '_score': 51.04628,
  '_source': {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or

In [13]:
[hit['_score'] for hit in response['hits']['hits']]

[84.050095, 75.54128, 72.08518, 51.04628, 49.938507]

In [19]:
[print(f"{hit['_source']['question']}\n\n") for hit in response['hits']['hits']]

How do I debug a docker container?


How do I copy files from my local machine to docker container?


How do I copy files from a different folder into docker container’s working directory?




[None, None, None]

In [22]:
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI()

In [24]:
def elastic_search(index_name: str, query: str, filter_course: str, num_results: int=5):
    search_query = {
        "size": num_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": filter_course
                    }
                }
            }
        }
    }
    response = es_client.search(index=index_name, body=search_query)
    result_docs = [hit['_source'] for hit in response['hits']['hits']]
    return result_docs

In [48]:
def build_user_prompt(query: str, search_result: str):
    user_prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context_template = """
Q: {question}
A: {text}
""".strip()

    context= ""
    
    for doc in search_result:
        context += f"{context_template.format(question=doc['question'], text=doc['text'])}\n\n"    
    
    user_prompt = user_prompt_template.format(question=query, context=context).strip()
    return user_prompt

In [49]:
query="How do I execute a command in a running docker container?"
result_docs = elastic_search(index_name, query, filter_course="machine-learning-zoomcamp", num_results=3)
result = build_user_prompt(query, result_docs)
len(result)

1462

In [51]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting regex>=2022.1.18 (from tiktoken)
  Downloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m422.3 kB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m0m eta [36m0:00:01[0m
[?25hDownloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m775.1/775.1 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m0m eta [36m0:00:01[0m
[?25hInstalling collected packages: regex, tiktoken
Su

In [52]:
import tiktoken

In [53]:
encoding = tiktoken.encoding_for_model("gpt-4o")

In [54]:
len(encoding.encode(result))

322

In [55]:
encoding.decode_single_token_bytes(63842)

b"You're"

In [56]:
def llm(user_prompt: str, model: str="gpt-3.5-turbo"):
    response = client.chat.completions.create(
        model=model,
        messages= [
            {"role": "user", "content": user_prompt},
        ]
    )
    return response.choices[0].message.content

In [57]:
answer = llm(result, model="gpt-4o")

In [59]:
print(answer)

To execute a command in a running Docker container, you can use the `docker exec` command. First, identify the container ID using `docker ps`. Then, execute the command within the container using the following syntax:

1. Find the container ID:
   ```
   docker ps
   ```

2. Execute the command in the container:
   ```
   docker exec -it <container-id> <command>
   ```

For example, if you want to start a bash session inside the container, you can run:
   ```
   docker exec -it <container-id> bash
   ```


In [61]:
len(encoding.encode(answer))

120

Coste de los tokens de entrada=0.322∗0.005=0.00161

Coste de los tokens de salida=0.12∗0.015=0.0018

Coste total=0.00161+0.0018=0.00341

El coste total de usar la API de OpenAI con 322 tokens de entrada y 120 tokens de salida es de $0.00341.