In [2]:
import json
from pathlib import Path
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

company = "company__microsoft"
metadata_filters = {"method": "sequential"}  # dict for metadata matches

# Load global config
with open("../global.yaml", "r") as f:
    import yaml
    config = yaml.safe_load(f)

qdrant_url = config["QDRANT_URL"]

# Connect to Qdrant
client = QdrantClient(url=qdrant_url)

# Build filter from metadata_filters
filter_conditions = [FieldCondition(key=k, match=MatchValue(value=v)) for k, v in metadata_filters.items()]
scroll_filter = Filter(must=filter_conditions) if filter_conditions else None

# Scroll the collection to get up to 10 points
scroll_result = client.scroll(
    collection_name=company,
    limit=10,
    with_payload=True,
    with_vectors=False,
    scroll_filter=scroll_filter
)

# Display the text from payloads
for i, point in enumerate(scroll_result[0]):
    text = point.payload.get("text", "No text")
    print(f"Chunk {i+1}: {text[:200]}...")  # Show first 200 chars

In [3]:
# Check all collections in Qdrant
collections = client.get_collections()
print("Available collections:")
for collection in collections.collections:
    print(f"- {collection.name}")



Available collections:
- company__microsoft


In [4]:
# Show available fields in the collection
if collections.collections:
    collection_name = company  # or choose one
    try:
        sample_result = client.scroll(collection_name=collection_name, limit=1, with_payload=True)
        if sample_result[0]:
            payload = sample_result[0][0].payload
            print(f"\nAvailable fields in '{collection_name}':")
            for key in payload.keys():
                print(f"- {key}")
        else:
            print(f"\nNo data in '{collection_name}' to show fields.")
    except Exception as e:
        print(f"\nError retrieving fields: {e}")


No data in 'company__microsoft' to show fields.


In [None]:
# 1) get all strategy_hash values (GROUP BY)
facet_res = client.facet(
    collection_name=company,
    key="strategy_hash",
    limit=1000,
    exact=True,
)

hashes = [h.value for h in facet_res.hits]

# 2) for each hash, fetch ONE representative point and print all its payload keys/values
for h in hashes:
    flt = Filter(
        must=[FieldCondition(key="strategy_hash", match=MatchValue(value=h))]
    )

    points, _ = client.scroll(
        collection_name=company,
        scroll_filter=flt,
        limit=1,
        with_payload=True,
        with_vectors=False,  # do not fetch embeddings
    )

    if not points:
        continue

    payload = points[0].payload or {}
    # optional: remove the large text field from printing
    payload_no_text = {k: v for k, v in payload.items() if k not in ["text", "company", "chunk_idx"]}

    print(f"\nstrategy_hash={h} (example payload):")
    for k in sorted(payload_no_text.keys()):
        print(f"  {k}: {payload_no_text[k]}")

UnexpectedResponse: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Wrong input: No appropriate index for faceting: `strategy_hash`. Please create one to facet on this field. Check https://qdrant.tech/documentation/concepts/indexing/#payload-ind ...'

: 

# THE CELL BELOW DELETES FROM THE DATABASE

In [28]:
# Delete all points in the collection (namespace)
from qdrant_client.models import Filter

try:
    client.delete(collection_name=company, points_selector=Filter())
    print(f"All points in collection '{company}' have been deleted.")
except Exception as e:
    print(f"Error deleting points: {e}")

All points in collection 'company__microsoft' have been deleted.
