## Step 1. Fetch data

In [1]:
import requests

In [2]:
docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
docs_raw = docs_response.json()

In [3]:
documents = []

for course in docs_raw:
    for doc in course['documents']:
        doc['course'] = course['course']
        documents.append(doc)

## Step 2: Prepare LLM client

In [4]:
import os

gemini_api_key = os.environ['GEMINI_API_KEY']

In [5]:
from google import genai

gemini_client = genai.Client(api_key=gemini_api_key)

In [6]:
def llm_generate(prompt):
    response = gemini_client.models.generate_content(
        model='gemini-2.0-flash',
        contents=prompt
    )

    return response

## Step 3: Build prompt template

In [7]:
def build_prompt(query, context):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT:
    {context}
    -----------------------------------------------------------------
    """.strip()
    
    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt

## Step 4. Prepare Vector DB: qdrant

#### Initialize qdrant client

In [15]:
from qdrant_client import QdrantClient, models

In [9]:
qd_client = QdrantClient('http://localhost:6333')

#### Choose model

In [10]:
from fastembed import TextEmbedding

In [11]:
EMBEDDING_DIMENSIONALITY = 512

In [12]:
import json

for models in TextEmbedding.list_supported_models():
    if models['dim'] == EMBEDDING_DIMENSIONALITY:
        print(json.dumps(models, indent=2))

{
  "model": "BAAI/bge-small-zh-v1.5",
  "sources": {
    "hf": "Qdrant/bge-small-zh-v1.5",
    "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz",
    "_deprecated_tar_struct": true
  },
  "model_file": "model_optimized.onnx",
  "description": "Text embeddings, Unimodal (text), Chinese, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year.",
  "license": "mit",
  "size_in_GB": 0.09,
  "additional_files": [],
  "dim": 512,
  "tasks": {}
}
{
  "model": "Qdrant/clip-ViT-B-32-text",
  "sources": {
    "hf": "Qdrant/clip-ViT-B-32-text",
    "url": null,
    "_deprecated_tar_struct": false
  },
  "model_file": "model.onnx",
  "description": "Text embeddings, Multimodal (text&image), English, 77 input tokens truncation, Prefixes for queries/documents: not necessary, 2021 year",
  "license": "mit",
  "size_in_GB": 0.25,
  "additional_files": [],
  "dim": 512,
  "tasks": {}
}
{
  "model": "jinaai/jina-embeddings-v2-small-e

In [13]:
model_handle = 'jinaai/jina-embeddings-v2-small-en'

#### Create collection

collection_name = 'zoomcamp-faq'

vector_settings = models.VectorParams(
    size=EMBEDDING_DIMENSIONALITY, 
    distance=models.Distance.COSINE
)

qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=vector_settings
)

#### Organize points

In [16]:
points = []

for i, doc in enumerate(documents):
    text = doc['question'] + ' ' + doc['text']
    vector = models.Document(
        text=text,
        model=model_handle
    )
    
    point = models.PointStruct(
        id = i,
        vector=vector,
        payload=doc
    )

    points.append(point)

#### Index the points to collection

In [None]:
qd_client.upsert(
    collection_name=collection_name,
    points=points
)

#### Create vector search function

In [28]:
def vector_search(query, limit=5):
    course = 'data-engineering-zoomcamp'
    query_embed = models.Document(text=query, model=model_handle)
    query_filter = models.Filter(
        must = models.FieldCondition(
            key = 'course',
            match = models.MatchValue(value=course)
        )
    )
    
    search_reponse = qd_client.query_points(
        collection_name=collection_name,
        query=query_embed,
        limit=limit,
        query_filter=query_filter,
    )

    context = ""
    for response in search_reponse.points:
        question = response.payload['question']
        text = response.payload['text']
        context += f'Question: {question}\nAnswer: {text}\n\n'
    
    return context

## Create RAG function

In [29]:
def rag(query):
    context = vector_search(query)
    prompt = build_prompt(query, context)
    response = llm_generate(prompt)

    # return context
    return response.candidates[0].content.parts[0].text

In [38]:
query = "How to run kafka?"

In [39]:
answer = rag(query)

In [40]:
print(answer)

There are different ways to run Kafka depending on the context:

*   **To run producer/consumer/kstreams/etc in terminal (Java):** In the project directory, run: `java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java`
*   **If you encounter a Module "kafka" not found error when trying to run producer.py:** Create a virtual environment, activate it, and install the requirements.  Then run the python files in that environment.
*   **If you encounter kafka.errors.NoBrokersAvailable:**  Ensure your Kafka broker Docker container is working using `docker ps`. If it's not running, use `docker compose up -d` in the Docker Compose YAML file folder to start the instances.

