# Load JSON / JSONL into Qdrant

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/thierrypdamiba/qdrant-etl-cookbook/blob/main/notebooks/etl/json_to_qdrant.ipynb)

Stream JSON lines into Qdrant with payload filtering support. Handles nested objects and large files.

In [None]:
!pip install -q qdrant-client sentence-transformers

In [None]:
import json
import tempfile
from pathlib import Path
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance
from sentence_transformers import SentenceTransformer

In [None]:
client = QdrantClient(":memory:")
model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
# Create sample JSONL data
sample_records = [
    {"text": "Qdrant supports both dense and sparse vectors", "topic": "features"},
    {"text": "Collections can be created with custom distance metrics", "topic": "setup"},
    {"text": "Payload filtering narrows search results efficiently", "topic": "search"},
    {"text": "Snapshots provide backup and restore capabilities", "topic": "ops"},
    {"text": "HNSW index enables fast approximate nearest neighbor search", "topic": "algorithms"},
]

jsonl_path = Path(tempfile.mktemp(suffix=".jsonl"))
with open(jsonl_path, "w") as f:
    for record in sample_records:
        f.write(json.dumps(record) + "\n")

print(f"Created {jsonl_path} with {len(sample_records)} records")

In [None]:
client.create_collection(
    collection_name="json_collection",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

In [None]:
# Stream JSONL with batching
points = []
total = 0

with open(jsonl_path) as f:
    for idx, line in enumerate(f):
        record = json.loads(line)
        embedding = model.encode(record["text"]).tolist()
        points.append(
            PointStruct(id=idx, vector=embedding, payload=record)
        )

        if len(points) >= 100:
            client.upsert(collection_name="json_collection", points=points)
            total += len(points)
            points = []

if points:
    client.upsert(collection_name="json_collection", points=points)
    total += len(points)

print(f"Loaded {total} records")

In [None]:
# Search
query_vector = model.encode("How do I back up my data?").tolist()
results = client.search(
    collection_name="json_collection",
    query_vector=query_vector,
    limit=3,
)

for r in results:
    print(f"Score: {r.score:.4f} | [{r.payload['topic']}] {r.payload['text']}")