In [4]:
!pip install sentence-transformers -q

In [None]:
!pip install qdrant_client -q

In [1]:
# Import necessary modules
from qdrant_client import QdrantClient, models
import json
import numpy as np
from sentence_transformers import SentenceTransformer

# Initialize client and model
client = QdrantClient(path="./qdrant_db")
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
text_embeddings_size = 768  # Update based on your model's output size

<All keys matched successfully>


In [2]:
# Load the JSON output file from the previous code
with open('data/transcripts_policies_output.json', 'r') as f:
    user_requests_data = json.load(f)

# Generate embeddings for all user requests
user_request_texts = [item['user_request'] for item in user_requests_data]
user_request_embeddings = model.encode(user_request_texts, show_progress_bar=True).astype('float32')

try:
    collection_name = "user_requests"
    
    if client.collection_exists(collection_name):
        client.delete_collection(collection_name=collection_name)
        print(f"Collection '{collection_name}' deleted successfully.")

    client.create_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(
            size=text_embeddings_size,
            distance=models.Distance.COSINE
        ),
    )
    print(f"Collection '{collection_name}' created successfully.")

except Exception as e:
    print(f"An error occurred while setting up the collection: {e}")

# Prepare points for upload
points_to_upload = [
    models.PointStruct(
        id=i,
        vector=user_request_embeddings[i].tolist(),
        payload={
            "user_request": item['user_request'],  # The text to embed
            "customer_id": item['customer_id'],
            "call_date": item['call_date'],
            "transcript_id": item['transcript_id'],
            "summary": item['summary'],
            "policy_applied": item['policy_applied'],
            "policy_details": item['policy_details'],
            "action_taken": item['action_taken']
        },
    )
    for i, item in enumerate(user_requests_data)
]

# Upload to Qdrant
client.upsert(
    collection_name=collection_name,
    wait=True,
    points=points_to_upload,
)

print(f"Uploaded {len(points_to_upload)} user request embeddings to Qdrant.")



Batches:   0%|          | 0/9 [00:00<?, ?it/s]

Collection 'user_requests' created successfully.
Uploaded 276 user request embeddings to Qdrant.


In [3]:


# Updated search function
def search_user_requests(query, model, client, policy_filter=None, k=10):
    """
    Search user requests with optional policy filter.
    
    Args:
        query (str): The search query
        model: The sentence transformer model
        client: Qdrant client
        policy_filter (str): Filter by policy_applied field
        k (int): Number of results to return
    """
    query_embedding = model.encode(query, show_progress_bar=False).tolist()
    
    query_filter = None
    if policy_filter:
        query_filter = models.Filter(
            must=[
                models.FieldCondition(
                    key="policy_applied",
                    match=models.MatchValue(value=policy_filter)
                )
            ]
        )

    results = client.query_points(
        collection_name="user_requests",
        query=query_embedding,
        query_filter=query_filter,
        limit=k,
        with_payload=True,
    ).points

    return results

In [4]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer


def parse_search_results(results):
    """Parse Qdrant search results into a clean format."""
    parsed_results = []
    
    for result in results:
        parsed_results.append({
            'score': result.score,
            'user_request': result.payload['user_request'],
            'customer_id': result.payload['customer_id'],
            'policy_applied': result.payload['policy_applied'],
            'action_taken': result.payload['action_taken'],
            'summary': result.payload['summary']
        })
    
    return parsed_results

# Usage
results = search_user_requests("What is return policy of hammer", model, client, k=5)
parsed = parse_search_results(results)

# Print results
for i, result in enumerate(parsed, 1):
    print(f"{i}. Score: {result['score']:.3f}")
    print(f"   Request: {result['user_request']}")
    print(f"   Policy: {result['policy_applied']}")
    print(f"   Action: {result['action_taken']}")
    print(f"   Summary: {result['summary']}")
    print()

1. Score: 0.588
   Request: How can I return an item to my local Home Depot store?
   Policy: Order Cancellation
   Action: The agent submitted a cancellation request for the incorrect item and informed the customer about the next steps and potential outcomes.
   Summary: The customer, Laurie Ryan, called Home Depot to cancel an incorrect item from her order and replace it with the correct one. The agent explained that while the item could be canceled, it could not be replaced in the same order due to payment processing restrictions. The cancellation request was submitted, and the customer was informed about the possibility of the cancellation not being guaranteed.

2. Score: 0.582
   Request: I received part of my order and want to return items for a refund.
   Policy: Order Cancellation and Refund
   Action: Agent confirmed the cancellation of one item and advised the customer to contact the store for a refund.
   Summary: Customer placed an order online and received a cancellation e