In [None]:
!pip install groq elasticsearch tqdm

!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

In [129]:
import minsearch, json
from groq import Groq
from tqdm.auto import tqdm

# Load Documents

In [15]:
with open('documents.json', 'rt') as ft:
    doc_raw = json.load(ft)
documents = []
for course_dict in doc_raw:
        course_name = course_dict["course"]
        for doc in course_dict["documents"]:
             doc['course'] = course_name
             documents.append(doc)

In [20]:
Index  = minsearch.Index(
    text_fields = ["question","text","section"],
    keyword_fields = ["course"]
)
Index.fit(documents)

In [None]:
from elasticsearch import Elasticsearch

es_client = Elasticsearch("http://localhost:9200")
es_client.info()

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"
response = es.indices.create(index=index_name, body=index_settings)
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

In [137]:
def search_elastic(query):
    
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }
    response = es_client.search(index=index_name, body=search_query)
    documents = [hit['_source'] for hit in response['hits']['hits']]
    return documents

def search(query):
    boost = {'question':3.0,'section':0.5}
    results = Index.search(
    query=query,
    filter_dict = {'course':'data-engineering-zoomcamp'},
    boost_dict = boost,
    num_results = 5)
    return results

def build_prompt(query, search_results):
    promt_template = """
    You are a course teaching assistant. Answer the QUESTION on the CONTEXT. 
    Use only the facts from the CONTEXT when answering the QUESTIONS.
    if the CONTEXT does not contain the answer, output NONE
    
    QUESTION: {question}
    
    CONTEXT :
    {context}
    
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context+ f"section : {doc['section']}\nquestion : {doc['question']}\nanswer:{doc['text']}\n\n"
    promt= promt_template.format(question=query, context=context)
    return  promt

def llm(promt,api_key):
    client = Groq(
    api_key = (api_key),
    )
    chat_completion = client.chat.completions.create(
    messages = [
        {
        "role":"user",
        "content":promt,
        }
    ],
    model = "mixtral-8x7b-32768",
    )
    return (chat_completion.choices[0].message.content)

def rag(query, api_key):
    search_results = search(query)
    promt_question = build_prompt(query, search_results)
    answer = llm(promt_question, api_key)
    return print(answer)

def rag_elastic(query):
    search_results = search_elastic(query)
    promt_question = build_prompt(query, search_results)
    answer = llm(promt_question)
    return print(answer)

In [138]:
query = 'how i run kafka for begginer?'
api_key = ''
rag(query,api_key)


To run Kafka for a beginner, first create a virtual environment and install the necessary packages. Here are the steps:

1. Create a virtual environment:
   ```
   python -m venv env
   ```
2. Activate the virtual environment:
   - For MacOS, Linux:
     ```
     source env/bin/activate
     ```
   - For Windows:
     ```
     env/Scripts/activate
     ```
3. Install the required packages:
   ```
   pip install -r ../requirements.txt
   ```
4. If you are using Python, you can install the `confluent-kafka` and `fastavro` packages using:
   ```
   pip install confluent-kafka fastavro
   ```

If you encounter the error "ModuleNotFoundError: No module named 'kafka.vendor.six.moves'", you can fix it by using `kafka-python-ng` instead:
```
pip install kafka-python-ng
```

If you are using Docker, make sure all the images are up and running before creating the virtual environment.

For Java Kafka, you can run the producer/consumer/kstreams/etc in the terminal by running the following command 