In [1]:
import json
from qdrant_client import QdrantClient, models
from fastembed import TextEmbedding, ImageEmbedding
from fastembed import SparseTextEmbedding
from qdrant_client.models import (
    Filter, FieldCondition, MatchValue, MatchAny,
    Prefetch, SparseVector, FusionQuery, Fusion,
)


dense_embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5")
sparse_embedding_model = SparseTextEmbedding(model_name="prithivida/Splade_PP_en_v1")
image_embedding_model = ImageEmbedding(model_name="Qdrant/clip-ViT-B-32-vision")

def retrieve_context(
    client: QdrantClient,
    collection_name: str,
    query_text: str,
    tenant_id: str,
    image_path: str = None,
    source_type: str = None,
    tags: list[str] = None,
    customer_id: str = None,
    k_prefetch: int = 10,
    top_k: int = 5,
    fusion_method: Fusion = Fusion.RRF,
):
    """
    Retrieve the top-K most semantically similar points matching the given filters.
    """

    dense_vector = None
    sparse_vec = None
    prefetches = []

    if query_text:
        dense_vector = list(dense_embedding_model.embed([query_text]))[0]
        sparse_result = list(sparse_embedding_model.embed([query_text]))[0]
        sparse_vec = SparseVector(
            indices=sparse_result.indices,
            values=sparse_result.values,
        )

        prefetches.append(
            Prefetch(query=sparse_vec, using="sparse", limit=k_prefetch)
        )
        prefetches.append(
            Prefetch(
                query=dense_vector.tolist() if hasattr(dense_vector, "tolist") else dense_vector,
                using="dense",
                limit=k_prefetch,
            )
        )

    if image_path and collection_name == "orders":
        from fastembed import ImageEmbedding
        image_vec = list(image_embedding_model.embed([image_path]))[0]
        prefetches.append(
            Prefetch(query=image_vec, using="image", limit=k_prefetch)
        )

    must_clauses = []
    if tenant_id:
        must_clauses.append(FieldCondition(key="tenant_id", match=MatchValue(value=tenant_id)))
    if source_type:
        must_clauses.append(FieldCondition(key="source_type", match=MatchValue(value=source_type)))
    if customer_id:
        must_clauses.append(FieldCondition(key="customer_id", match=MatchValue(value=customer_id)))
    if tags:
        must_clauses.append(FieldCondition(key="tags", match=MatchAny(any=tags)))

    payload_filter = Filter(must=must_clauses)

    fusion_query = FusionQuery(fusion=fusion_method)

    results = client.query_points(
        collection_name=collection_name,
        prefetch=prefetches,
        query=fusion_query,
        query_filter=payload_filter,
        limit=top_k,
        with_payload=True
       
    )

    return [
        {
            "id": hit.id,
            'similarity_with_query': hit.score,
            "payload": hit.payload
        }
        for hit in results.points
    ]

def retrieve_customer_info(
    client: QdrantClient,
    tenant_id: str,
    customer_id: str,
):
    must_clauses = [
        FieldCondition(key="tenant_id", match=MatchValue(value=tenant_id))
    ]
    must_clauses.append(
        FieldCondition(key="source_type", match=MatchValue(value="crm"))
    )
    must_clauses.append(
        FieldCondition(key="customer_id", match=MatchValue(value=customer_id))
    )

    payload_filter = Filter(must=must_clauses)

    results, _ = client.scroll(
        collection_name="user_data",
        scroll_filter=payload_filter,
        limit=1,
    )

    if not results:
        return f"No customer information found for this tenant_id: {tenant_id} and customer_id: {customer_id}."
    
    return results[0].payload

def retrieve_customer_helpdesk_logs(client: QdrantClient, query: str, customer_id: str, tenant_id: str, top_k: int = 3, k_prefetch: int = 10) -> str:
    """
    Retrieves a comprehensive context for a user by fetching data from
    user_data (CRM, helpdesk) and knowledge_base collections.
    """

    helpdesk_records = retrieve_context(
        client=client,
        collection_name="user_data",
        query_text=query,
        tenant_id=tenant_id,
        source_type="helpdesk",
        customer_id=customer_id,
        top_k=top_k,
        k_prefetch = k_prefetch,
        fusion_method = Fusion.RRF,
    )

    if not helpdesk_records:
        return f"No relevant customer helpdesk ticket found for this tenant_id: {tenant_id} and customer_id: {customer_id} for this particular query."

    if helpdesk_records:
        sanitized_records = [{k: v for k, v in record.items() if not k == 'id'} for record in helpdesk_records]
        context = json.dumps(sanitized_records, indent=2)
        
    return context

def retrieve_related_knowledge_base(client: QdrantClient, query: str, tenant_id: str, source_type: str, tags: list=None, top_k: int = 3, k_prefetch: int = 10) -> str:
    related_kb = retrieve_context(
        client=client,
        collection_name="knowledge_base",
        query_text=query,
        tenant_id=tenant_id,
        source_type=source_type,
        tags=tags,
        top_k=top_k,
        k_prefetch = k_prefetch,
        fusion_method = Fusion.RRF,
    )

    if not related_kb:
        return f"No relevant knowledge base found for tenant_id: {tenant_id}, source_type: {source_type} with tags: {tags} for this particular query"
    
    sanitized_records = [{k: v for k, v in doc.items() if not k == 'id'} for doc in related_kb]
    context = json.dumps(sanitized_records, indent=2)
    return context

def retrieve_orders(
    client: QdrantClient,
    query_text: str,
    image_path: str,
    tenant_id: str,
    top_k: int = 5,
    k_prefetch: int = 10
):
    """
    Retrieve similar orders by multimodal query (text + image).
    """
    return retrieve_context(
        client=client,
        collection_name="orders",
        query_text=query_text,
        image_path=image_path,
        tenant_id=tenant_id,
        top_k=top_k,
        k_prefetch=k_prefetch,
        fusion_method=Fusion.RRF
    )

In [2]:
qdrant = QdrantClient(host="localhost", port=6333)

In [7]:
from qdrant_client.models import NamedVector

In [40]:
query_text = 'my product was drone'

image_vec = list(image_embedding_model.embed(['2.png']))[0]

prefetches = []
prefetches.append(
    Prefetch(query=image_vec, using="image", limit=10)
)

dense_vector = list(dense_embedding_model.embed([query_text]))[0]
sparse_result = list(sparse_embedding_model.embed([query_text]))[0]
sparse_vec = SparseVector(
    indices=sparse_result.indices,
    values=sparse_result.values,
)

# prefetches.append(
#     Prefetch(query=sparse_vec, using="sparse", limit=3)
# )
# prefetches.append(
#     Prefetch(
#         query=dense_vector.tolist() if hasattr(dense_vector, "tolist") else dense_vector,
#         using="dense",
#         limit=5,
#     )
# )
customer_id = 'CUST-047'
must_clauses = []
if customer_id:
    must_clauses.append(FieldCondition(key="customer_id", match=MatchValue(value=customer_id)))

payload_filter = Filter(must=must_clauses)

qdrant.query_points(
        collection_name='orders',
        prefetch=prefetches,
        query=FusionQuery(fusion=Fusion.RRF),
        limit=5,
        query_filter=payload_filter,
        with_payload=True
    )

QueryResponse(points=[ScoredPoint(id='b10c7a4c-7966-4f7e-82b8-e6cae29f877c', version=8, score=0.5, payload={'tenant_id': 'ecom', 'source_type': 'orders', 'order_id': 'ORD-0182', 'customer_id': 'CUST-047', 'order_date': '2025-07-08', 'product_name': 'Action Drone', 'product_category': 'Electronics', 'quantity': 1, 'unit_price': 650.0, 'total_amount': 650.0, 'payment_status': 'Paid', 'delivery_status': 'Shipped', 'payment_method': 'PayPal', 'shipping_address': 'San Francisco, USA', 'text_embeded': 'Product Name: Action Drone, Product Category: Electronics', 'image_embedded': 'data/ecom/images/ORD-0182.jpg'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id='d92b3023-30b7-4d5a-94b8-53b57e6cd9aa', version=8, score=0.33333334, payload={'tenant_id': 'ecom', 'source_type': 'orders', 'order_id': 'ORD-0132', 'customer_id': 'CUST-047', 'order_date': '2025-07-08', 'product_name': 'Drone', 'product_category': 'Electronics', 'quantity': 1, 'unit_price': 500.0, 'total_amount': 500.0, '

In [38]:
image_vec = list(image_embedding_model.embed(['2.png']))[0]

results = qdrant.search(
    collection_name="orders",
    query_vector=NamedVector(name='image', vector=image_vec),
    limit=5,
    with_payload=True
)

results

  results = qdrant.search(


[ScoredPoint(id='b10c7a4c-7966-4f7e-82b8-e6cae29f877c', version=8, score=0.9131622, payload={'tenant_id': 'ecom', 'source_type': 'orders', 'order_id': 'ORD-0182', 'customer_id': 'CUST-047', 'order_date': '2025-07-08', 'product_name': 'Action Drone', 'product_category': 'Electronics', 'quantity': 1, 'unit_price': 650.0, 'total_amount': 650.0, 'payment_status': 'Paid', 'delivery_status': 'Shipped', 'payment_method': 'PayPal', 'shipping_address': 'San Francisco, USA', 'text_embeded': 'Product Name: Action Drone, Product Category: Electronics', 'image_embedded': 'data/ecom/images/ORD-0182.jpg'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='d92b3023-30b7-4d5a-94b8-53b57e6cd9aa', version=8, score=0.8850959, payload={'tenant_id': 'ecom', 'source_type': 'orders', 'order_id': 'ORD-0132', 'customer_id': 'CUST-047', 'order_date': '2025-07-08', 'product_name': 'Drone', 'product_category': 'Electronics', 'quantity': 1, 'unit_price': 500.0, 'total_amount': 500.0, 'payment_status'

In [4]:
retrieve_orders(
    client = qdrant,
    query_text = None,
    image_path='../data/ecom/images/ORD-0182.jpg',
    tenant_id='ecom'
)

[{'id': 'b10c7a4c-7966-4f7e-82b8-e6cae29f877c',
  'similarity_with_query': 0.5,
  'payload': {'tenant_id': 'ecom',
   'source_type': 'orders',
   'order_id': 'ORD-0182',
   'customer_id': 'CUST-047',
   'order_date': '2025-07-08',
   'product_name': 'Action Drone',
   'product_category': 'Electronics',
   'quantity': 1,
   'unit_price': 650.0,
   'total_amount': 650.0,
   'payment_status': 'Paid',
   'delivery_status': 'Shipped',
   'payment_method': 'PayPal',
   'shipping_address': 'San Francisco, USA',
   'text_embeded': 'Product Name: Action Drone, Product Category: Electronics',
   'image_embedded': 'data/ecom/images/ORD-0182.jpg'}},
 {'id': 'd92b3023-30b7-4d5a-94b8-53b57e6cd9aa',
  'similarity_with_query': 0.33333334,
  'payload': {'tenant_id': 'ecom',
   'source_type': 'orders',
   'order_id': 'ORD-0132',
   'customer_id': 'CUST-047',
   'order_date': '2025-07-08',
   'product_name': 'Drone',
   'product_category': 'Electronics',
   'quantity': 1,
   'unit_price': 500.0,
   'tota