In [5]:
import openai
import instructor
from pydantic import BaseModel, Field
from qdrant_client import QdrantClient

In [6]:
class RAGGenerationResponse(BaseModel):
    answer: str = Field(description="The answer to the question")

In [7]:
qdrant_client = QdrantClient(url="http://localhost:6333")

### RAG Pipeline

In [18]:
def get_embeddings(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        model=model,
        input=text
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=10):
    """
    Retrieve k most similar items to the query from Qdrant collection.
    """
    response = qdrant_client.query_points(
        collection_name="amazon_items-collection-00",
        query=get_embeddings(query),
        limit=k
    )

    retrieved_context_ids = [];
    # this is description of the product
    retrieved_context = [];
    similarity_scores = [];
    retrieved_content_ratings = []
    for point in response.points:   
        retrieved_context_ids.append(point.payload["parent_asin"])
        retrieved_context.append(point.payload["description"])
        similarity_scores.append(point.score)
        retrieved_content_ratings.append(point.payload["average_rating"])
    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "similarity_scores": similarity_scores,
        "retrieved_content_ratings": retrieved_content_ratings
    }

def format_retrieved_context(context_data):
    formatted_context = ""
    for id, context, rating in zip(context_data["retrieved_context_ids"], context_data["retrieved_context"], context_data["retrieved_content_ratings"]):
            formatted_context += f"- {id}, description: {context}, rating: {rating}\n"  
    return formatted_context

def create_prompt(query, preprocessed_retrieved_context):
    processed_context = format_retrieved_context(preprocessed_retrieved_context)
    
    prompt = f"""
    You are a helpful shopping assistant who can answer questions about the products in stock.
    You are given a question and a list of products with their descriptions.
    Your job is to answer the question based on the context.

    Instructions:
    - Answer the question based on the context only.
    - Never use the word "context" in your answer and refer to it as available products.
    - If you don't know the answer, say "I don't know".
    - If the question is not related to the context, say "I don't know".
    - The answer to the question should contain detailed information about the product and returned with detailed specifications in bullet points.

    Question: 
    {query}
    Context: 
    {processed_context}
    """
    return prompt

def generate_answer(prompt):
    client = instructor.from_openai(openai.OpenAI())
    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5,
        response_model=RAGGenerationResponse
    )

    return response

def rag_pipeline(query, qdrant_client, top_k=10):
    preprocessed_retrieved_context = retrieve_data(query, qdrant_client, top_k)
    prompt = create_prompt(query, preprocessed_retrieved_context)
    answer = generate_answer(prompt)

    final_response = {
        "question": query,
        "answer": answer.answer,
        "retrieved_context_ids": preprocessed_retrieved_context["retrieved_context_ids"],
        "retrieved_context": preprocessed_retrieved_context["retrieved_context"],
        "similarity_scores": preprocessed_retrieved_context["similarity_scores"]
    }

    return final_response

In [19]:
result = rag_pipeline("Can I get some earphones?", qdrant_client)

In [21]:
result

{'question': 'Can I get some earphones?',
 'answer': 'Yes, you can get earphones from the available products. Here are some options:\n\n1. Wireless Earbuds (B0B9FTVL58):\n- Bluetooth 5.3\n- 37 hours playback time\n- LED power display\n- In-ear with deep bass\n- IPX7 waterproof\n- Ultra-light with charging case\n- Smart touch controls\n- Sport S23-vine earbuds\n- Rating: 4.2\n\n2. TELSOR Wireless Earbuds (B0C6K1GQCF):\n- Bluetooth 5.1\n- Touch control stereo sound\n- Noise cancelling microphone for calls\n- 30 hours playtime (6 hours per charge + charging case)\n- IPX7 waterproof\n- Ergonomic design\n- USB-C charging\n- Rating: 4.3\n\n3. Open Ear Headphones (B0CBMPG524):\n- Bluetooth 5.3\n- 60 hours playtime (11 hours single charge + charging case)\n- IPX7 waterproof\n- Open-ear design (does not enter ear canal)\n- Comfortable with earhooks\n- Premium stereo sound\n- Suitable for running, walking, workouts\n- Rating: 4.4\n\n4. Siniffo Bone Conduction Headphones (B0BNHVLF7G):\n- Bluetoot

In [20]:
print(result["answer"])

Yes, you can get earphones from the available products. Here are some options:

1. Wireless Earbuds (B0B9FTVL58):
- Bluetooth 5.3
- 37 hours playback time
- LED power display
- In-ear with deep bass
- IPX7 waterproof
- Ultra-light with charging case
- Smart touch controls
- Sport S23-vine earbuds
- Rating: 4.2

2. TELSOR Wireless Earbuds (B0C6K1GQCF):
- Bluetooth 5.1
- Touch control stereo sound
- Noise cancelling microphone for calls
- 30 hours playtime (6 hours per charge + charging case)
- IPX7 waterproof
- Ergonomic design
- USB-C charging
- Rating: 4.3

3. Open Ear Headphones (B0CBMPG524):
- Bluetooth 5.3
- 60 hours playtime (11 hours single charge + charging case)
- IPX7 waterproof
- Open-ear design (does not enter ear canal)
- Comfortable with earhooks
- Premium stereo sound
- Suitable for running, walking, workouts
- Rating: 4.4

4. Siniffo Bone Conduction Headphones (B0BNHVLF7G):
- Bluetooth 5.3
- Bone conduction technology
- IP56 waterproof
- 8 hours battery life
- Quick char

### RAG Pipeline with grounding context

In [None]:
class RAGUsedContext(BaseModel):
    id: str = Field(description="The id of the item used to answer the question")
    description: str = Field(description="Short description of the item used to answer the question")
    rating: float = Field(description="The rating of the item used to answer the question")


class RAGGenerationResponseWithReferences(BaseModel):
    answer: str = Field(description="The answer to the question")
    references: list[RAGUsedContext] = Field(description="List of items used to answer the question")



In [25]:
def get_embeddings(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        model=model,
        input=text
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=10):
    """
    Retrieve k most similar items to the query from Qdrant collection.
    """
    response = qdrant_client.query_points(
        collection_name="amazon_items-collection-00",
        query=get_embeddings(query),
        limit=k
    )

    retrieved_context_ids = [];
    # this is description of the product
    retrieved_context = [];
    similarity_scores = [];
    retrieved_content_ratings = []
    for point in response.points:   
        retrieved_context_ids.append(point.payload["parent_asin"])
        retrieved_context.append(point.payload["description"])
        similarity_scores.append(point.score)
        retrieved_content_ratings.append(point.payload["average_rating"])
    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "similarity_scores": similarity_scores,
        "retrieved_content_ratings": retrieved_content_ratings
    }

def format_retrieved_context(context_data):
    formatted_context = ""
    for id, context, rating in zip(context_data["retrieved_context_ids"], context_data["retrieved_context"], context_data["retrieved_content_ratings"]):
            formatted_context += f"- {id}, description: {context}, rating: {rating}\n"  
    return formatted_context

def create_prompt(query, preprocessed_retrieved_context):
    processed_context = format_retrieved_context(preprocessed_retrieved_context)
    
    prompt = f"""
    You are a helpful shopping assistant who can answer questions about the products in stock.
    You are given a question and a list of products with their descriptions.
    Your job is to answer the question based on the context.

    Instructions:
    - Answer the question based on the context only.
    - Never use the word "context" in your answer and refer to it as available products.
    - If you don't know the answer, say "I don't know".
    - If the question is not related to the context, say "I don't know".
    - As an output you need to provide:
        * The answer to the question based on provided context.
        * List of the IDs of the chunks that were used to answer the question. Only include the IDs of the chunks that were used to answer the question, no other chunks.
        * Short description (1-2 sentences) of the item based on the description provided in the context.
    - The short description should have the name of the item.
    - The answer to the question should contain detailed information about the product and returned with detailed specifications in bullet points.
    

    Question: 
    {query}
    Context: 
    {processed_context}
    """
    return prompt

def generate_answer(prompt):
    client = instructor.from_openai(openai.OpenAI())
    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5,
        response_model=RAGGenerationResponseWithReferences
    )

    return response

def rag_pipeline(query, qdrant_client, top_k=10):
    preprocessed_retrieved_context = retrieve_data(query, qdrant_client, top_k)
    prompt = create_prompt(query, preprocessed_retrieved_context)
    answer = generate_answer(prompt)

    final_response = {
        "question": query,
        "answer": answer.answer,
        "references": answer.references,
        "retrieved_context_ids": preprocessed_retrieved_context["retrieved_context_ids"],
        "retrieved_context": preprocessed_retrieved_context["retrieved_context"],
        "similarity_scores": preprocessed_retrieved_context["similarity_scores"]
    }

    return final_response

In [26]:
result = rag_pipeline("Can I get some earphones?", qdrant_client)

In [27]:
print(result["answer"])
print(result["references"])


Yes, you can get earphones from the available products. Here are some options with detailed specifications:

1. Wireless Earbuds (ID: B0B9FTVL58)
- Bluetooth 5.3
- 37 hours playback time
- LED power display
- In-ear design with deep bass
- IPX7 waterproof
- Ultra-light with charging case
- Smart touch controls
- Suitable for sports
- Rating: 4.2

2. TUNEAKE Kids Headphones (ID: B0C142QS8X)
- Over-ear design
- Volume limited to 94dB for hearing protection
- Foldable and adjustable for kids
- 3.5mm jack
- Compatible with smartphones, tablets, laptops
- No microphone
- Rating: 4.5

3. TELSOR Wireless Earbuds (ID: B0C6K1GQCF)
- Bluetooth 5.1
- Noise cancelling mic for calls
- 30 hours total playtime (6 hours per charge)
- IPX7 waterproof
- Touch controls
- Compatible with iPhone, Android, smart TVs, computers
- Rating: 4.3

4. Open Ear Headphones (ID: B0CBMPG524)
- Bluetooth 5.3
- 60 hours playtime
- IPX7 waterproof
- Open-ear design to hear surroundings
- Comfortable and secure fit with e