In [1]:
pip install -q "dlt[qdrant]" "qdrant-client[fastembed]"

In [2]:
import dlt
import requests

### Question 1. dlt Version

In [3]:
print(f"The Version of dlt that was installed was {dlt.__version__}")

The Version of dlt that was installed was 1.13.0


### dlt Resource

In [4]:
import dlt
import requests

@dlt.resource
def zoomcamp_data():
    docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
    docs_response = requests.get(docs_url)
    documents_raw = docs_response.json()

    for course in documents_raw:
        course_name = course['course']

        for doc in course['documents']:
            doc['course'] = course_name
            yield doc

### Question 2. dlt pipeline

In [5]:
from dlt.destinations import qdrant

qdrant_destination = qdrant(
  qd_path="db.qdrant",
)

In [6]:
pipeline = dlt.pipeline(
    pipeline_name="zoomcamp_pipeline",
    destination=qdrant_destination,
    dataset_name="zoomcamp_tagged_data"

)
load_info = pipeline.run(zoomcamp_data())
print(pipeline.last_trace)

Run started at 2025-07-10 22:45:28.226990+00:00 and COMPLETED in 21.95 seconds with 4 steps.
Step extract COMPLETED in 0.50 seconds.

Load package 1752187538.587544 is EXTRACTED and NOT YET LOADED to the destination and contains no failed jobs

Step normalize COMPLETED in 0.11 seconds.
Normalized data for the following tables:
- zoomcamp_data: 948 row(s)

Load package 1752187538.587544 is NORMALIZED and NOT YET LOADED to the destination and contains no failed jobs

Step load COMPLETED in 11.16 seconds.
Pipeline zoomcamp_pipeline load step completed in 11.13 seconds
1 load package(s) were loaded to destination qdrant and into dataset zoomcamp_tagged_data
The qdrant destination used /content/db.qdrant location to store data
Load package 1752187538.587544 is LOADED and contains no failed jobs

Step run COMPLETED in 21.94 seconds.
Pipeline zoomcamp_pipeline load step completed in 11.13 seconds
1 load package(s) were loaded to destination qdrant and into dataset zoomcamp_tagged_data
The qdr

Based on the output above we see that 948 rows were inserted into the collection

### Question 3. Embeddings


In [8]:
from pprint import pprint
import json

with open("db.qdrant/meta.json", "r") as f:
    meta = json.load(f)

# Pretty print the JSON
pprint(meta)

{'aliases': {},
 'collections': {'zoomcamp_tagged_data': {'hnsw_config': None,
                                          'init_from': None,
                                          'on_disk_payload': None,
                                          'optimizers_config': None,
                                          'quantization_config': None,
                                          'replication_factor': None,
                                          'shard_number': None,
                                          'sharding_method': None,
                                          'sparse_vectors': None,
                                          'strict_mode_config': None,
                                          'vectors': {'fast-bge-small-en': {'datatype': None,
                                                                            'distance': 'Cosine',
                                                                            'hnsw_config': None,
                           

In [9]:
with open("db.qdrant/meta.json", "r") as f:
    meta = json.load(f)

collection = meta["collections"]["zoomcamp_tagged_data_zoomcamp_data"]

# Get the name of the embedding model
embedding_model_name = list(collection["vectors"].keys())[0]
print("Embedding model used:", embedding_model_name)

Embedding model used: fast-bge-small-en
