In [None]:
import pandas as pd
import json
import time
import resource
import numpy as np

from langchain_qdrant import Qdrant

import qdrant_client
from qdrant_client import QdrantClient
from qdrant_client.models import (
    PointStruct, 
    VectorParams, 
    Distance, 
    SparseVector, 
    SparseVectorParams,
    SparseIndexParams,
    NamedVector,
    NamedSparseVector,
    SearchRequest
)

from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Configuration
url = "http://localhost:6333"
collection_name = "faq-hybrid"
distance = Distance.COSINE
dense_dimension = 1024

# Initialize models
# Dense embeddings model
dense_embeddings = SentenceTransformer(
    "jinaai/jina-embeddings-v3",
    trust_remote_code=True
)

# Sparse embeddings model 
sparse_model = SentenceTransformer(
    "intfloat/multilingual-e5-large", 
    trust_remote_code=True
)


In [4]:
# Load FAQ json.
with open("../data/full_faq.json", "r", encoding="utf-8") as file:
    faq_data = json.load(file)

faq_data[0]


{'question': 'Di mana ada lokasi Rumah Sakit Siloam?',
 'answer': 'Ada 40 Rumah Sakit modern yang terdiri dari 13 Rumah Sakit di Jabodetabek dan 27 rumah sakit yang tersebar di Jawa, Sumatera, Kalimantan, Sulawesi, serta Bali dan Nusa Tenggara.',
 'category': 'FAQ Website'}

In [46]:
dense_embeddings.encode('Di mana ada lokasi Rumah Sakit Siloam?')

array([ 0.0023278 , -0.04797845,  0.06509324, ..., -0.02864278,
       -0.04848163,  0.00240045], shape=(1024,), dtype=float32)

In [47]:
sparse_model.encode('Di mana ada lokasi Rumah Sakit Siloam?')

array([ 0.00825853, -0.00684039, -0.01008988, ..., -0.04223974,
       -0.02853844,  0.01105917], shape=(1024,), dtype=float32)

In [48]:
def moveEmbedding(faq_data, batch_size=100):
    """
    Load FAQ data from a JSON-like list of dictionaries and upsert both dense and sparse 
    embeddings into Qdrant, processing the data in batches.
    
    Each FAQ should have the following keys: 'question', 'answer', and 'category'.
    """
    client = qdrant_client.QdrantClient(url=url)
    
    # Create collection if it doesn't exist
    if not client.collection_exists(collection_name=collection_name):
        client.create_collection(
            collection_name=collection_name,
            vectors_config={
                "dense": VectorParams(size=dense_dimension, distance=distance),
            },
            sparse_vectors_config={
                "sparse": SparseVectorParams(
                    index=SparseIndexParams(on_disk=False)
                )
            }
        )
    
    total_batches = (len(faq_data) - 1) // batch_size + 1
    overall_start_time = time.time()
    
    for batch_num in range(total_batches):
        batch_start_time = time.time()
        start = batch_num * batch_size
        end = start + batch_size
        batch_faq = faq_data[start:end]
        
        texts = []
        ids = []
        payloads = []
        
        for i, faq in enumerate(batch_faq, start=start):
            # Use question for embedding
            text = faq['question']
            texts.append(text)
            ids.append(i)
            payloads.append({
                "page_content": text,
                "metadata": {
                    "question": faq['question'],
                    "answer": faq['answer'],
                    "category": faq['category']
                }
            })
        
        # Create dense embeddings
        dense_batch_embeddings = list(dense_embeddings.encode(texts))
        
        # Create sparse embeddings
        sparse_batch_embeddings = list(sparse_model.encode(texts))
        
        points = []
        for j, (dense_emb, sparse_emb) in enumerate(zip(dense_batch_embeddings, sparse_batch_embeddings)):
            # Get indices where the element is non-zero
            nonzero_indices = np.nonzero(sparse_emb)[0]
            # Get the corresponding non-zero values
            nonzero_values = sparse_emb[nonzero_indices]

            sparse_vector = SparseVector(
                indices=nonzero_indices.tolist(),
                values=nonzero_values.tolist()
            )

            points.append(
                PointStruct(
                    id=ids[j],
                    vector={
                        "dense": dense_emb,
                        "sparse": sparse_vector
                    },
                    payload=payloads[j]
                )
            )
        
        # Upsert the current batch of points into Qdrant
        client.upsert(
            collection_name=collection_name,
            points=points
        )
        
        batch_end_time = time.time()
        batch_elapsed = batch_end_time - batch_start_time
        print(f"Processed batch {batch_num+1}/{total_batches} in {batch_elapsed:.2f} seconds")
    
    overall_end_time = time.time()
    total_elapsed = overall_end_time - overall_start_time

    # Calculate peak memory usage (in MB)
    peak_memory = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
    print(f"Added: {len(faq_data)} FAQs in {total_elapsed:.2f} seconds, Peak Memory: {peak_memory:.2f} MB")

In [13]:
moveEmbedding(faq_data)

Processed batch 1/4 in 19.87 seconds
Processed batch 2/4 in 13.09 seconds
Processed batch 3/4 in 10.91 seconds
Processed batch 4/4 in 5.86 seconds
Added: 349 FAQs in 49.73 seconds, Peak Memory: 5344.78 MB


In [14]:
# Initiate client.
client = QdrantClient("localhost", port=6333)

# Show collections.
collections = client.get_collections()
print(collections)


collections=[CollectionDescription(name='faq-hybrid')]


In [38]:
def hybrid_search(query, k=20, alpha=0.5):
    """
    Perform hybrid search using both dense and sparse embeddings.
    
    Parameters:
    - query: The search query text
    - k: Number of results to retrieve
    - alpha: Weight of dense vs sparse results (1.0 = only dense, 0.0 = only sparse)
    
    Returns:
    - Hybrid search results
    """
    client = QdrantClient(url=url)
    
    # Get dense embedding for query
    dense_query_vector = dense_embeddings.encode([query])[0]
    
    # Get sparse embedding for query
    sparse_query_vector = sparse_model.encode([query])[0]
    
    # Get indices where the element is non-zero
    nonzero_indices = np.nonzero(sparse_query_vector)[0]
    # Get the corresponding non-zero values
    nonzero_values = sparse_query_vector[nonzero_indices]
    
    # Prepare the sparse vector in Qdrant's format
    sparse_vector = SparseVector(
        indices=nonzero_indices.tolist(),
        values=nonzero_values.tolist()
    )
    
    # Perform search with both vectors
    search_results = client.search_batch(
        collection_name=collection_name,
        requests=[
            # Dense search
            SearchRequest(
                vector=NamedVector(
                    name="dense",
                    vector=dense_query_vector.tolist()
                ),
                limit=k,
                with_payload=True,
                score_threshold=0.1
            ),
            # Sparse search
            SearchRequest(
                vector=NamedSparseVector(
                    name="sparse",
                    vector=sparse_vector
                ),
                limit=k,
                with_payload=True,
                score_threshold=0.1
            )
        ]
    )
    
    # Combine results using reciprocal rank fusion
    dense_results = search_results[0]
    sparse_results = search_results[1]
    
    # Create rank lists for RRF
    dense_ranks = [(point.id, rank + 1) for rank, point in enumerate(dense_results)]
    sparse_ranks = [(point.id, rank + 1) for rank, point in enumerate(sparse_results)]
    
    # Perform RRF fusion
    fused_ranks = rrf([dense_ranks, sparse_ranks], alpha=alpha)
    
    # Create a lookup dict for all results
    all_results_dict = {}
    for result_list in search_results:
        for point in result_list:
            if point.id not in all_results_dict:
                all_results_dict[point.id] = point
    
    # Get the results in fused rank order
    fused_results = []
    for id_, score in fused_ranks:
        if id_ in all_results_dict:
            fused_results.append((all_results_dict[id_], score))
    
    return fused_results

def rrf(rank_lists, alpha=60, default_rank=1000):
    """
    Reciprocal Rank Fusion (RRF) for combining multiple ranked lists.
    
    Parameters:
    - rank_lists: A list of rank lists (each item is (id, rank))
    - alpha: The RRF parameter (larger = less influence from lower ranks)
    - default_rank: Rank to use for items not in a list
    
    Returns:
    - Combined ranked list with scores
    """
    # Gather all unique items
    all_items = set()
    for rank_list in rank_lists:
        all_items.update(item for item, _ in rank_list)
    
    # Create item to rank mapping for each list
    rank_maps = []
    for rank_list in rank_lists:
        rank_map = {item: rank for item, rank in rank_list}
        rank_maps.append(rank_map)
    
    # Calculate RRF scores
    rrf_scores = {}
    for item in all_items:
        score = 0
        for rank_map in rank_maps:
            rank = rank_map.get(item, default_rank)
            score += 1.0 / (alpha + rank)
        rrf_scores[item] = score
    
    # Sort by RRF score (descending)
    sorted_items = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
    return sorted_items

def test_hybrid_search(test_list, k=20):
    """
    Test the hybrid search on a list of test queries.
    
    Parameters:
    - test_list: List of test queries (each item is a dict with 'query' and 'expected')
    - k: Number of results to retrieve
    
    Returns:
    - Performance summary
    """
    total = len(test_list)
    hybrid_correct = 0
    
    client = QdrantClient(url=url)
    
    for test in test_list:
        query = test["query"]
        expected = test["expected"]
        
        print(f"Query: {query}")
        print(f"Expected: {expected}")
        
        # Run hybrid search
        hybrid_results = hybrid_search(query, k=k)
        
        # Check if expected result is in top results
        found_in_hybrid = False
        
        for point, _ in hybrid_results:
            if point.payload["metadata"]["question"] == expected:
                hybrid_correct += 1
                found_in_hybrid = True
                break
        
        print(f"Found in hybrid search: {found_in_hybrid}")
        print("-" * 40)
    
    print("Summary:")
    print(f"Total Queries: {total}")
    print(f"Hybrid Search Correct: {hybrid_correct}/{total} ({hybrid_correct/total*100:.2f}%)")
    
    return {
        "total": total,
        "hybrid_correct": hybrid_correct
    }

def compare_search_speed(test_queries, k=20):
    """
    Compare execution speed of different search methods using query_points.
    
    Parameters:
    - test_queries: List of test queries
    - k: Number of results to retrieve
    """
    total_queries = len(test_queries)
    total_time_dense = 0.0
    total_time_sparse = 0.0
    total_time_hybrid = 0.0

    client = QdrantClient(url=url)

    for test in test_queries:
        query = test["query"]
        print(f"Query: {query}")
        
        # Dense search timing
        start_time = time.time()
        dense_query_vector = dense_embeddings.encode([query])[0]
        dense_results = client.query_points(
            collection_name=collection_name,
            query=dense_query_vector.tolist(),  # List of floats
            using="dense",                     # Specify the dense field
            limit=k,
            with_payload=True
        )
        time_dense = time.time() - start_time
        total_time_dense += time_dense

        # Sparse search timing
        start_time = time.time()
        sparse_query_vector = sparse_model.encode([query])[0]
        nonzero_indices = np.nonzero(sparse_query_vector)[0]
        nonzero_values = sparse_query_vector[nonzero_indices]
        sparse_vector = SparseVector(
            indices=nonzero_indices.tolist(),
            values=nonzero_values.tolist()
        )
        sparse_results = client.query_points(
            collection_name=collection_name,
            query=sparse_vector,  # SparseVector instance
            using="sparse",       # Specify the sparse field
            limit=k,
            with_payload=True
        )
        time_sparse = time.time() - start_time
        total_time_sparse += time_sparse
        
        # Hybrid search timing
        start_time = time.time()
        hybrid_results = hybrid_search(query, k=k)
        time_hybrid = time.time() - start_time
        total_time_hybrid += time_hybrid
        
        print(f"Dense search time: {time_dense:.4f} seconds")
        print(f"Sparse search time: {time_sparse:.4f} seconds")
        print(f"Hybrid search time: {time_hybrid:.4f} seconds")
        print("-" * 40)

    # Calculate averages
    avg_time_dense = total_time_dense / total_queries
    avg_time_sparse = total_time_sparse / total_queries
    avg_time_hybrid = total_time_hybrid / total_queries

    print("Summary:")
    print(f"Total Queries: {total_queries}")
    print(f"Average dense search time: {avg_time_dense:.4f} seconds")
    print(f"Average sparse search time: {avg_time_sparse:.4f} seconds")
    print(f"Average hybrid search time: {avg_time_hybrid:.4f} seconds")


In [39]:
# Example test queries
test_queries = [
    {
        "query": "Dimana lokasi Rumah Sakit Siloam?",
        "expected": "Di mana ada lokasi Rumah Sakit Siloam?"
    },
    {
        "query": "Bagaimana cara membuat janji dengan dokter?",
        "expected": "Bagaimana cara membuat janji dengan dokter (appointment)?"
    },
    {
        "query": "Apa saja layanan medis yang tersedia?",
        "expected": "Apa saja Medical Services yang tersedia di rumah sakit?"
    }
]

# Run test on the hybrid search
test_results = test_hybrid_search(test_queries)

# Compare search speeds
compare_search_speed(test_queries)

# Example of simple search usage
def search_faq(query, k=5):
    """
    Simple function to search FAQ and return formatted results
    
    Parameters:
    - query: User question
    - k: Number of results to return
    
    Returns:
    - Formatted results with questions and answers
    """
    results = hybrid_search(query, k=k)
    
    print(f"Search results for: '{query}'")
    print("-" * 60)
    
    for i, (point, score) in enumerate(results, 1):
        question = point.payload["metadata"]["question"]
        answer = point.payload["metadata"]["answer"]
        category = point.payload["metadata"]["category"]
        
        print(f"{i}. Question: {question}")
        print(f"   Answer: {answer}")
        print(f"   Category: {category}")
        print(f"   Score: {score:.4f}")
        print()
    
    return results

# Example usage
search_faq("Biaya untuk medical check up")

Query: Dimana lokasi Rumah Sakit Siloam?
Expected: Di mana ada lokasi Rumah Sakit Siloam?


  search_results = client.search_batch(


Found in hybrid search: True
----------------------------------------
Query: Bagaimana cara membuat janji dengan dokter?
Expected: Bagaimana cara membuat janji dengan dokter (appointment)?
Found in hybrid search: False
----------------------------------------
Query: Apa saja layanan medis yang tersedia?
Expected: Apa saja Medical Services yang tersedia di rumah sakit?
Found in hybrid search: False
----------------------------------------
Summary:
Total Queries: 3
Hybrid Search Correct: 1/3 (33.33%)
Query: Dimana lokasi Rumah Sakit Siloam?
Dense search time: 0.2220 seconds
Sparse search time: 0.2087 seconds
Hybrid search time: 0.4214 seconds
----------------------------------------
Query: Bagaimana cara membuat janji dengan dokter?
Dense search time: 0.2140 seconds
Sparse search time: 0.2273 seconds
Hybrid search time: 0.4600 seconds
----------------------------------------
Query: Apa saja layanan medis yang tersedia?
Dense search time: 0.2075 seconds
Sparse search time: 0.2064 seconds


[(ScoredPoint(id=111, version=1, score=0.7029362, payload={'page_content': 'Bagaimana cara membuat pemesanan medical check-up?', 'metadata': {'question': 'Bagaimana cara membuat pemesanan medical check-up?', 'answer': "Anda dapat mengakses menu 'Pesan Check-Up', lalu pilih paket pemeriksaan dan juga rumah sakit tempat pemeriksaan. Setelah itu, Anda perlu memilih waktu untuk melakukan pemeriksaan dan melakukan pembayaran.", 'category': 'MySiloam Apps'}}, vector=None, shard_key=None, order_value=None),
  1.3333333333333333),
 (ScoredPoint(id=32, version=0, score=0.67284036, payload={'page_content': 'Apa saja metode pembayaran pada pendaftaran MCU / Medical Check Up?', 'metadata': {'question': 'Apa saja metode pembayaran pada pendaftaran MCU / Medical Check Up?', 'answer': 'Pembayaran pada pendaftaran MCU / Medical Check Up dapat dilakukan dengan transfer ke rekening Bank, Credit Card, dan pembayaran cash pada saat melakukan registrasi.', 'category': 'Medical Check Up'}}, vector=None, sha