In this example, we use instructor to enrich the response with the actual data of each reference ID that was returned as the top K elements from the retrieving search in Qdrant database. The code is basically the same of the previous notebook, except for the fact that StructuredResponse now contains additional fields to better model the response we want. 

In [None]:
import openai
import instructor
from qdrant_client import QdrantClient
from pydantic import BaseModel, Field

In [None]:
# Define the pydantic model for the ReferencedItem retrieved from the vector database.
class ReferencedItem(BaseModel):
    id: str = Field(..., description="The unique identifier of the referenced item (parent ASIN).")
    description: str = Field(..., description="The short description of the referenced item.")

# Define the output schema using Pydantic. This schema will be used to structure the model's response via instructor.
class StructuredResponse(BaseModel):
    answer: str = Field(
        ..., description="A brief summary of the weather in Italy today."
    )
    references: list[ReferencedItem] = Field(..., description="A list of items used to answer the question.")

In [None]:
# create the instructor client for the openai client
client = instructor.from_openai(openai.OpenAI())

In [None]:
# Now let's define a sample RAG pipeline
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(
        context["retrieved_context_ids"],
        context["retrieved_context"],
        context["retrieved_context_ratings"],
    ):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):
    # Let's use a more specific prompt for the shopping assistant;
    # now we instruct explicitly the LLM about how to structure the output answer.
    # Instructor enforces a Pydantic schema on LLM outputs, but the schema fields don't need to exist
    # in the source data. The LLM can generate fields dynamically (e.g., summaries, transformations)
    # based on the context you provide in the prompt. Instructor handles schema validation and retries,
    # but you must explicitly instruct the LLM on how to generate each field.

    # In our example the description field in the ReferencedItem model is not present in the source data,
    # so we need to instruct the LLM to generate it based on the description provided in the context (the prompt below).

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.
- As an output you need to provide:

* The answer to the question based on the provided context.
* The list of the IDs that were used to answer the question. Only return the ones used in the answer.
* Short description (1-2 sentences) of each item based on the description provided in the context.

- The short description should have the name of the item.
- The answer to the question should contain detailed information about the product and return with detailed specifications in bullet points.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


# Instead of using the Openai client, let's use the instructor client here as well
# and the StructuredResponse model defined above
def generate_answer(prompt):

    response, original_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0,
        response_model=StructuredResponse,
    )
    # the return object is the StructuredResponse model instance
    return response


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    response = generate_answer(prompt)

    final_result = {
        # print the full data model response for reference (debugging/tracing)
        "data_model": response,
        "answer": response.answer,
        # add the references as well
        "references": response.references,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "retrieved_context_ratings": retrieved_context["retrieved_context_ratings"],
        "similarity_scores": retrieved_context["similarity_scores"],
    }

    return final_result

In [None]:
# create the Qdrant client (ensure to start the container first)
qdrant_client = QdrantClient(url="http://localhost:6333")

In [None]:
# let's invoke the rag pipeline
query = "What are some good products with high ratings for outdoor activities?"
# let's increase the top_key to 10 to test that the prompt actually returns only the relevant items
# not necessarily all the top 10 items.
output = rag_pipeline(query, qdrant_client, top_k=10)

In [None]:
output

In [None]:
print(output["answer"])