In [36]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
import socket
import sys
from sentence_transformers import SentenceTransformer
from qdrant_client.http import models
    

In [37]:
key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.w2PnVQu7hvrSb-5u-CWEPxGXuUbkDy_IaElgNBHAnXU"

url = "https://bf57de15-f343-4041-aacd-6d8daccab983.europe-west3-0.gcp.cloud.qdrant.io"

client = QdrantClient(url, api_key=key)

In [38]:

def search_similar_books(query_text, url, api_key, model_name="all-MiniLM-L6-v2", 
                        collection_name="books", limit=5):
    """
    Search for books similar to the query text using Qdrant vector database.
    
    Parameters:
    - query_text: Text to search for similar books
    - url: Qdrant server URL
    - api_key: Qdrant API key
    - model_name: The sentence transformer model to use (should match what was used for embeddings)
    - collection_name: Name of the collection in Qdrant
    - limit: Maximum number of results to return
    
    Returns:
    - List of search results with title, summary and similarity score
    """
    try:
        # Initialize the client with a timeout
        client = QdrantClient(url, api_key=api_key, timeout=10)
        
        # Test connection by making a simple request
        try:
            client.get_collections()
            print("Successfully connected to Qdrant server.")
        except Exception as e:
            print(f"Error connecting to Qdrant server: {e}")
            print(f"Please verify your URL ({url}) and API key are correct.")
            return []
        
        # Load the model
        model = SentenceTransformer(model_name)
        
        # Encode the query text
        query_vector = model.encode(query_text).tolist()
        
        # Search for similar books
        search_results = client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=limit
        )
        
        return search_results
        
    except socket.gaierror as e:
        print(f"DNS resolution error: {e}")
        print("Unable to resolve the hostname in the provided URL.")
        print("Troubleshooting steps:")
        print("1. Check that the URL is correct")
        print("2. Verify your internet connection")
        print("3. Try using an IP address instead of hostname if possible")
        print("4. Check if your network allows access to this service")
        return []
    except Exception as e:
        print(f"Unexpected error: {e}")
        return []

def display_search_results(results):
    """
    Display the search results in a readable format.
    
    Parameters:
    - results: Search results from Qdrant
    """
    if not results:
        print("No results found.")
        return
        
    for i, result in enumerate(results):
        print(f"\n--- Result {i+1} (Similarity Score: {result.score:.4f}) ---")
        print(f"Title: {result.payload.get('title', 'Unknown')}")
        print(f"Summary: {result.payload.get('summary', 'No summary available')[:200]}...")


In [39]:
def hybrid_search_books(query_text, keywords=None, url=None, api_key=None, 
                        model_name="all-MiniLM-L6-v2", collection_name="books", 
                        limit=5, score_threshold=0.0, vector_weight=0.7, keyword_weight=0.3):
    """
    Perform hybrid search combining vector similarity with keyword matching.
    
    Parameters:
    - query_text: Main query text for semantic search
    - keywords: List of specific keywords to match in the summary or title
    - url: Qdrant server URL (if None, will use localhost)
    - api_key: Qdrant API key (if required)
    - model_name: Sentence transformer model to use
    - collection_name: Name of the collection in Qdrant
    - limit: Maximum number of results to return
    - score_threshold: Minimum similarity score (0-1) to include in results
    - vector_weight: Weight for vector search component (0-1)
    - keyword_weight: Weight for keyword matching component (0-1)
    
    Returns:
    - List of search results with title, summary and similarity score
    """

    try:
        # Initialize client with proper defaults
        if url is None:
            client = QdrantClient(host="localhost", port=6333, timeout=10)
        else:
            client = QdrantClient(url=url, api_key=api_key, timeout=10)
        
        # Test connection
        try:
            client.get_collections()
            print("Successfully connected to Qdrant server.")
        except Exception as e:
            print(f"Error connecting to Qdrant server: {e}")
            return []
        
        # Load the model
        model = SentenceTransformer(model_name)
        
        # Encode the query text
        query_vector = model.encode(query_text).tolist()
        
        # First perform a basic search without filters to ensure we get results
        basic_results = client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=limit * 2,  # Get more for hybrid ranking
            with_payload=True
        )
        
        print(f"Basic vector search found {len(basic_results)} results")
        
        # If no keywords provided or basic search returned nothing, return basic results
        if not keywords or not basic_results:
            return basic_results[:limit]
        
        # If keywords provided, perform keyword-based filtering on the results we got
        # This avoids using Filter which might be causing issues
        filtered_results = []
        
        # Convert single keyword to list if needed
        if isinstance(keywords, str):
            keywords = [keywords]
        
        keywords = [k.lower() for k in keywords]  # Lowercase for case-insensitive matching
        
        # Manual filtering and scoring approach
        for result in basic_results:
            title = result.payload.get('title', '').lower()
            summary = result.payload.get('summary', '').lower()
            combined_text = title + " " + summary
            
            # Check if any keyword matches
            matches = sum(1 for keyword in keywords if keyword in combined_text)
            
            if matches > 0:  # At least one keyword must match
                # Calculate keyword score (normalized by number of keywords)
                keyword_score = min(matches / len(keywords), 1.0)
                
                # Original vector similarity score
                vector_score = result.score
                
                # Calculate hybrid score
                hybrid_score = (vector_score * vector_weight) + (keyword_score * keyword_weight)
                
                # Update score
                result.score = hybrid_score
                filtered_results.append(result)
        
        # Sort by hybrid score and limit results
        filtered_results = sorted(filtered_results, key=lambda x: x.score, reverse=True)
        filtered_results = filtered_results[:limit]
        
        print(f"Hybrid search found {len(filtered_results)} results")
        return filtered_results
        
    except socket.gaierror as e:
        print(f"DNS resolution error: {e}")
        return []
    except Exception as e:
        print(f"Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        return []

def display_hybrid_search_results(results):
    """
    Display the hybrid search results in a readable format.
    
    Parameters:
    - results: Search results from hybrid search
    """
    if not results:
        print("No results found.")
        return
        
    print(f"\nFound {len(results)} matching books:\n")
    
    for i, result in enumerate(results):
        print(f"\n===== Result {i+1} (Score: {result.score:.4f}) =====")
        print(f"Title: {result.payload.get('title', 'Unknown')}")
        
        # Get a shorter summary preview
        summary = result.payload.get('summary', 'No summary available')
        summary_preview = summary[:200] + "..." if len(summary) > 200 else summary
        print(f"Summary: {summary_preview}")

In [40]:
# Example usage
if __name__ == "__main__":
    # Configuration
    url = url  # Replace with your actual URL
    api_key = key  # Replace with your actual API key
    query = "book about dogs and magic and friendship"  # Replace with your query text
    
    # Print diagnostic info
    print(f"Python version: {sys.version}")
    print(f"Connecting to Qdrant at: {url}")
    
    # Search for similar books
    results = search_similar_books(query, url, api_key)
    
    # Display the results
    display_search_results(results)

Python version: 3.11.7 | packaged by Anaconda, Inc. | (main, Dec 15 2023, 18:05:47) [MSC v.1916 64 bit (AMD64)]
Connecting to Qdrant at: https://bf57de15-f343-4041-aacd-6d8daccab983.europe-west3-0.gcp.cloud.qdrant.io
Successfully connected to Qdrant server.

--- Result 1 (Similarity Score: 0.6531) ---
Title: If Dogs Could Swear
Summary: A hilarious, charming, and foul-mouthed look inside the minds of man's best friends
What is my dog thinking?That's the question that every dog owner asks when their beloved pet stares up at them with ...

--- Result 2 (Similarity Score: 0.5449) ---
Title: Graphic Classics, Volume 25: Canine Classics / Feline Classics
Summary: Canine/Feline Classics is a unique, two-in-one volume, with half stories for dog-lovers, and half for cat-lovers. Featured are "The Emissary" by Ray Bradbury, "Ancient Sorceries" by Algernon Blackwood...

--- Result 3 (Similarity Score: 0.5129) ---
Title: If My Dogs Were a Pair of Middle-Aged Men
Summary: From the Eisner Award-winn

  search_results = client.search(


In [41]:
# Example usage
if __name__ == "__main__":
    query = "book about dogs and magic and friendship"
    keywords = ["dog", "bird", "cat"]
    
    results = hybrid_search_books(
        query_text=query,
        keywords=keywords,
        api_key = key,  # Replace with your actual API key
        url=url,
        limit=5
    )

display_hybrid_search_results(results)

Successfully connected to Qdrant server.
Basic vector search found 10 results
Hybrid search found 5 results

Found 5 matching books:


===== Result 1 (Score: 0.6567) =====
Title: Bird Cat Dog
Summary: The first book in the Three-Story Books series tells the story of a bird, a cat, and a dog through clever wordless comics. Each page is a three-by-three grid, with each row of three containing the sto...

===== Result 2 (Score: 0.5815) =====
Title: Graphic Classics, Volume 25: Canine Classics / Feline Classics
Summary: Canine/Feline Classics is a unique, two-in-one volume, with half stories for dog-lovers, and half for cat-lovers. Featured are "The Emissary" by Ray Bradbury, "Ancient Sorceries" by Algernon Blackwood...

===== Result 3 (Score: 0.5590) =====
Title: If My Dogs Were a Pair of Middle-Aged Men
Summary: From the Eisner Award-winning creator of The Oatmealand #1 New York Timesbestselling authorof How to Tell If Your Cat Is Plotting to Kill Youcomes this charmingly absurd gift boo

  basic_results = client.search(
