In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [3]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "is it too late to join the course?"}]
)

In [4]:
response.choices[0].message.content

"It's hard to say without specific details about the course you're referring to, as different courses have different enrollment periods, deadlines, and policies. Here are a few steps you can take to find out if it's too late to join:\n\n1. **Check the Course Website**: Most courses will have information on their official website regarding enrollment deadlines and late registration policies.\n2. **Contact the Instructor or Institution**: Reach out to the course instructor or the admissions office. They can provide you with the most accurate and up-to-date information.\n3. **Review Your Email**: If you received any emails about the course, they might contain information about deadlines.\n4. **Look for Flexible Options**: Some courses offer rolling admissions, late enrollment periods, or even self-paced learning options that might allow you to join even after the official start date.\n\nIf it turns out that you have missed the deadline, consider asking if there is a waitlist or if the cou

In [5]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [6]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch("http://localhost:9200")

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "homework168"
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'homework168'})

In [7]:
from tqdm.auto import tqdm

for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 948/948 [00:03<00:00, 284.74it/s]


In [8]:
query = "How do I execute a command in a running docker container?"

In [21]:
search_query = {
    "size": 3,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "course": "machine-learning-zoomcamp"
                }
            }
        }
    }
}

In [22]:
response = es_client.search(index=index_name, body=search_query)

In [23]:
response

ObjectApiResponse({'took': 4, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 345, 'relation': 'eq'}, 'max_score': 84.050095, 'hits': [{'_index': 'homework168', '_id': 'XnlLXJABolFaCK30H1pw', '_score': 84.050095, '_source': {'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)', 'section': '5. Deploying Machine Learning Models', 'question': 'How do I debug a docker container?', 'course': 'machine-learning-zoomcamp'}}, {'_index': 'homework168', '_id': 'fXlLXJABolFaCK30H1rX', '_score': 51.04628, '_source': {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file

In [12]:
search_query = {
    "size": 3,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "course": "machine-learning-zoomcamp"
                }
            }
        }
    }
}


In [24]:
def build_promt(index_output):
    query = "How do I execute a command in a running docker container?"

    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT:
    {context}
    """.strip()
        
    context_template = """
    Q: {question}
    A: {text}
    """.strip()
    
    context = ""
    for hit in index_output["hits"]["hits"]:
        hit = hit["_source"]
        context += context_template.format(question=hit['question'],
                                          text=hit['text']) + "\n\n"
    prompt = prompt_template.format(question=query, context=context)
    return prompt.strip()

In [25]:
context = build_promt(es_client.search(index=index_name, body=search_query))
len(context)

1498

In [17]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o")
len(encoding.encode(context))

331