### Import dependancies

In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PayloadSchemaType, PointStruct, SparseVectorParams, Document, Prefetch, FusionQuery
from qdrant_client import models

import pandas as pd
import openai
import cohere 

### Retrieval

In [2]:
qdrant_client = QdrantClient(url="http://localhost:6333")


In [3]:
def get_embedding(text: str, model: str = "text-embedding-3-small") -> list[float]:
    """
    Generates an embedding vector for the given input text using OpenAI's embedding model.

    Args:
        text (str): The text to embed.
        model (str, optional): The OpenAI embedding model name. Defaults to "text-embedding-3-small".

    Returns:
        list: The embedding vector representing the text.
    """
    response = openai.embeddings.create(
        input=[text],
        model=model
    )
    return response.data[0].embedding

In [5]:
def retrieve_data(query: str, qdrant_client: QdrantClient, k: int = 5) -> dict:
    """
    Retrieve relevant product data from Qdrant vector database based on semantic similarity.

    Args:
        query (str): The user query or search string for similar products.
        qdrant_client (QdrantClient): An instantiated Qdrant client.
        k (int, optional): Number of most similar products to retrieve. Defaults to 5.

    Returns:
        dict: A dictionary containing:
            - retrieved_context_ids (list[str]): Product ASINs.
            - retrieved_context (list[str]): Product descriptions.
            - retrieved_context_ratings (list[float]): Product ratings.
            - similarity_scores (list[float]): Cosine similarity scores for matches.
    """
    # Convert user query to dense vector embedding (for ANN search).
    query_embedding = get_embedding(query)

    # Run a hybrid search: prefetch retrieves two candidate sets,
    # one dense (vector) and one sparse (BM25). Fusion combines them (e.g. with reciprocal rank fusion).
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-01-hybrid-search",
        prefetch=[
            Prefetch(
                query=query_embedding,
                using="text-embedding-3-small",
                limit=20
            ),
            Prefetch(
                query=Document(
                    text=query,
                    model="qdrant/bm25"
                ),
                using="bm25",
                limit=20
            )
        ],
        query=FusionQuery(fusion="rrf"),
        limit=k
    )

    # Extract relevant fields (IDs, descriptions, ratings, scores) from the result points.
    retrieved_context_ids = []
    retrieved_context = []
    retrieved_context_ratings = []
    similarity_scores = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    # Package output as a dictionary for downstream use.
    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }

In [10]:
query = "Can I get some tablets?"

In [None]:
# create a specific items to understand why we would need re-ranking
results = retrieve_data(query,qdrant_client,k=20)

In [7]:
results



{'retrieved_context_ids': ['B09P29VXG1',
  'B0C78B1BTB',
  'B09F8TLBZL',
  'B09VP8SZSR',
  'B0C35RS6MS',
  'B0B44TGKRX',
  'B0BTCSVPQH',
  'B0C8PF45CL',
  'B09TFS9298',
  'B0C9QCRNBK',
  'B09MFHFHQM',
  'B0B6CBS5CJ',
  'B0C1RSJ3WC',
  'B0BVLDTNCB',
  'B09SPTJD8N',
  'B0BL3YQXK5',
  'B0B5X6FNPJ',
  'B0BQMRGDTK',
  'B0B2NNL811',
  'B0BHZ5G6LW'],
 'retrieved_context': ['SAMSUNG Galaxy Tab A8 10.5‚Äù 64GB Android Tablet, LCD Screen, Kids Content, Smart Switch, Long Lasting Battery, US Version, 2022, Silver, Amazon ExclusiveA SCREEN EVERYONE WILL LOVE: Whether your family is streaming or video chatting with friends, the Galaxy Tab A8 tablet brings out the best in every moment on a 10.5" LCD screen POWER AND STORAGE FOR ALL: Get the power, storage and speed your family needs with an upgraded chipset and plenty of room to keep files ‚Äî up to 128GB of storage; a long-lasting battery lets you go unplugged for hours to keep the family fun going CHARGE FAST, POWER FOR HOURS: Go for hours on a si

### Reranking
- Rerank the retrieved context list

In [8]:
cohere_client = cohere.ClientV2()


In [9]:
to_rerank = results["retrieved_context"]

In [11]:
response = cohere_client.rerank(
    model="rerank-v3.5",
    query=query,
    documents=to_rerank,
    top_n=20
)

In [12]:
response



In [13]:
reranked_results = [to_rerank[results.index] for results in response.results]


In [14]:
results["retrieved_context"]


['SAMSUNG Galaxy Tab A8 10.5‚Äù 64GB Android Tablet, LCD Screen, Kids Content, Smart Switch, Long Lasting Battery, US Version, 2022, Silver, Amazon ExclusiveA SCREEN EVERYONE WILL LOVE: Whether your family is streaming or video chatting with friends, the Galaxy Tab A8 tablet brings out the best in every moment on a 10.5" LCD screen POWER AND STORAGE FOR ALL: Get the power, storage and speed your family needs with an upgraded chipset and plenty of room to keep files ‚Äî up to 128GB of storage; a long-lasting battery lets you go unplugged for hours to keep the family fun going CHARGE FAST, POWER FOR HOURS: Go for hours on a single charge and back to 100% with the fast charging USB C port; Battery life may vary depending on network environment, usage patterns and other factors GALAXY ECOSYSTEM: Open up a new world of convenient possibilities with the Galaxy ecosystem experience ‚Äî your devices, including your phone, laptop computer, and tablet, all automatically talk to one another seaml

In [15]:
reranked_results

['DUODUOGO 2 in 1 Tablet 10 Inch with Keyboard Mouse Stylus, 128GB Expand 64GB ROM 4GB RAM Android 10.0 Quad-Core HD IPS Screen 8MP Dual Camera GPS FM OTG Bluetooth 4G Dual SIM & WiFi, DGO-P610.1 Inch HD IPS Tablet - This tablet features a 1280*800 HD G+G IPS touch display, wide viewing angle, bright display, more vivid colors and top-notch picture quality. High Performance 2 in 1 Tablet - Equipped with 4GB RAM, 64GB ROM, you can expand the storage up to 128GB using a Micro SD card (not included). Perfect for daily study and entertainment use. Android 10.0 Tablet - An Android 10.0 tablet can watch videos like YouTube and Netflix. Use ZOOM, Skype, etc. to join the meeting. Also chat with friends on Facebook, WhatsApp. LONG BATTERY LIFE - 8000mAh built-in rechargeable battery and low-power CPU enhance your experience, with long runtime and standby time, you can enjoy up to 8 hours. PERFECT GIFT - The DUODUOGO P8 tablet is suitable for all ages, seniors can use it to watch TV, kids can us