In [1]:
import os, json, openai
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

from langsmith import Client

In [2]:
qdrant_client = QdrantClient("localhost", port=6333)

In [3]:
# download sample dataset form the collection
all_points = qdrant_client.scroll(
    collection_name="amazon_items-collection-00",
    limit=100,
    offset=None,
    with_payload=True,
    with_vectors=False, #as we are not using embeddings
)

all_points[0]

[Record(id=0, payload={'description': "1080P Full HD Dash Camera for Cars, Diamond Lark Dash Cam Front with 32G SD Card, 3‚ÄùLCD Screen, 170¬∞Wide Angle, Dashboard DashCam with Loop Recording, HDR, Night Vision, G-Sensor, Parking Monitor ['„Äê1080P FHD Front Dash Cam„ÄëDiamond Lark dash camera for cars features a 3-inch LCD display screen and 1080P video recording, which can clearly shoot scenes during driving and capture emergencies at any time. In addition, the dashcams for cars with a 170-degree wide-angle lens can minimize blind spots and make the shooting content more comprehensive', '„ÄêLoop Recording„ÄëThe car camera has a built-in loop recording function. When you enable this function, the old videos in the car security camera will be constantly overwritten with new videos to ensure uninterrupted video recording. Besides, the car dashcam can be set three kinds of video cycle shooting duration: 1 minute, 3 minutes and 5 minutes, which you can set it according to your needs', '„Ä

In [5]:
complete_context = [{"id": point.payload["parent_asin"], "description": point.payload["description"]} for point in all_points[0]]


In [6]:
complete_context

[{'id': 'B0BLH9LX4P',
  'description': "1080P Full HD Dash Camera for Cars, Diamond Lark Dash Cam Front with 32G SD Card, 3‚ÄùLCD Screen, 170¬∞Wide Angle, Dashboard DashCam with Loop Recording, HDR, Night Vision, G-Sensor, Parking Monitor ['„Äê1080P FHD Front Dash Cam„ÄëDiamond Lark dash camera for cars features a 3-inch LCD display screen and 1080P video recording, which can clearly shoot scenes during driving and capture emergencies at any time. In addition, the dashcams for cars with a 170-degree wide-angle lens can minimize blind spots and make the shooting content more comprehensive', '„ÄêLoop Recording„ÄëThe car camera has a built-in loop recording function. When you enable this function, the old videos in the car security camera will be constantly overwritten with new videos to ensure uninterrupted video recording. Besides, the car dashcam can be set three kinds of video cycle shooting duration: 1 minute, 3 minutes and 5 minutes, which you can set it according to your needs', '„

In [8]:
from pydantic import BaseModel, Field
from typing import List, Dict

class SyntheticQA(BaseModel):
    question: str = Field(..., description="Suggested question from a potential user.")
    chunk_ids: List[str] = Field(..., description="List of chunk IDs relevant to the answer.")
    answer_example: str = Field(..., description="A grounded answer based strictly on the context.")
    reasoning: str = Field(..., description="Why these chunks were chosen.")

class SyntheticQADataset(BaseModel):
    questions: List[SyntheticQA] = Field(..., description="List of synthetic questions")


In [10]:
SYSTEM_PROMPT = """I am building a RAG application. I have a collection of product descriptions.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.

Instructions:
- Come up with 30 questions total.
- 10 questions must require multiple chunks to answer (comparison or aggregate questions).
- 15 questions must be answerable by a single chunk.
- 5 questions must be unanswerable (where the answer should state information is missing).
- The questions should imitate a potential real user of this RAG system.
"""

USER_PROMPT = f"""Here is the list of available products (context):
{json.dumps(complete_context, indent=2)}

Generate the dataset."""

In [16]:
response = openai.chat.completions.parse(
    model="gpt-5-mini",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ],
    response_format=SyntheticQADataset,
)

synthetic_qa_dataset = response.choices[0].message.parsed



In [19]:
langsmith_client = Client(api_key = os.environ["LANGSMITH_API_KEY"])
dataset_name = "rag-evaluation-dataset"
dataset = langsmith_client.create_dataset(
    dataset_name=dataset_name,
    description="A dataset of synthetic questions for RAG evaluation",
)

In [22]:
synthetic_qa_dataset.questions[0]

SyntheticQA(question='Does the Diamond Lark 1080P dash camera come with a memory card and what maximum card size does it support?', chunk_ids=['B0BLH9LX4P'], answer_example='The Diamond Lark 1080P dash camera includes a 32GB micro SD card (installed) and supports up to 64GB storage (do not install more than 64GB).', reasoning='The product description for B0BLH9LX4P explicitly states a 32GB SD card is included and notes the maximum supported card size of 64GB.')

In [25]:
def get_production_description(asin: str) -> str:
    points = qdrant_client.scroll(
        collection_name="amazon_items-collection-00",
        scroll_filter=Filter(
            must=[
                FieldCondition(key="parent_asin", match=MatchValue(value=asin)),
            ]
        ),
        with_payload=True,
        limit=10,
        with_vectors=False,
    )[0]
    return points[0].payload["description"]

In [26]:
for question in synthetic_qa_dataset.questions:
    langsmith_client.create_example(
        dataset_id=dataset.id,
        inputs={"question": question.question},
        outputs={
            "ground_truth": question.answer_example,
            "reference_chunks": question.chunk_ids,
            "reference_descriptions": [get_production_description(asin) for asin in question.chunk_ids],
            },
    )