In [None]:
import openai
import os

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

from langsmith import Client

In [None]:
qdrant_client = QdrantClient(url="http://localhost:6333")

### Download all data from Qdrant

In [None]:
all_points = qdrant_client.scroll(
    collection_name="Amazon-items-collection-00",
    limit=100,
    offset=None,
    with_payload=True,
    with_vectors=False
)

In [None]:
all_points

In [None]:
all_points[0][0].payload

In [None]:
all_context = [{"id": data.payload["parent_asin"], "text": data.payload["description"]} for data in all_points[0]]

In [None]:
all_context

### Render a prompt to generate synthetic Eval reference dataset

In [None]:
import json

output_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "Suggested question.",
            },
            "chunk_ids": {
                "type": "array",
                "items": {
                    "type": "string",
                    "description": "ID of the chunk that could be used to answer the question.",
                },
            },
            "answer_example": {
                "type": "string",
                "description": "Suggested answer grounded in the context.",
            },
            "reasoning": {
                "type": "string",
                "description": "Reasoning why the question could be answered with the chunks.",
            },
        },
    },
}


SYSTEM_PROMPT = f"""
I am building a RAG application. I have a collection of 50 chunks of text.
The RAG application will act as a shopping assistant that can answer questions about the stock of the products we have available.
I will provide all of the available products to you with IDs of each chunk.
I want you to come up with 30 questions to which the answers could be grounded in the chunk context.
As an output I need you to provide me the list of questions and the IDs of the chunks that could be used to answer them.
Also, provide an example answer to the question given the context of the chunks.
Also, provide the reason why you chose the chunks to answer the questions.
Construct 10 that could use multipple chunks in the answer.
Construct 15 questions that could use single chunk in the answer.
Also, include 5 questions that can't be answered with the available chunks.

<OUTPUT JSON SCHEMA>
{json.dumps(output_schema, indent=2)}
</OUTPUT JSON SCHEMA>

I need to be able to parse the json output.
"""

USER_PROMPT = f"""
Here is the list of chunks, each list element is a dictionary with id and text:
{all_context}
"""

In [None]:
print(USER_PROMPT)

In [None]:
print(SYSTEM_PROMPT)

In [None]:
response = openai.chat.completions.create(
    model="gpt-4.1",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT}
    ]
)

print(response.choices[0].message.content)

In [None]:
import json

json_output = response.choices[0].message.content
json_output = json_output.replace("```json", "")
json_output = json_output.replace("```", "")
json_output = json_output.replace("// Single-chunk questions (15)", "")
json_output = json.loads(json_output)

In [None]:
len(json_output)

In [None]:
json_output

In [None]:
points = qdrant_client.scroll(
    collection_name="Amazon-items-collection-00",
    scroll_filter=Filter(
        must=[
            FieldCondition(
                key="parent_asin",
                match=MatchValue(value="B0BYYLJRHT")
            )
        ]
    ),
    limit=100,
    with_payload=True,
    with_vectors=False
)[0]

In [None]:
points[0].payload

In [None]:
def get_description(parent_asin: str) -> str:

    points = qdrant_client.scroll(
        collection_name="Amazon-items-collection-00",
        scroll_filter=Filter(
            must=[
                FieldCondition(
                    key="parent_asin",
                    match=MatchValue(value=parent_asin)
                )
            ]
        ),
        limit=100,
        with_payload=True,
        with_vectors=False
    )[0]

    return points[0].payload["description"]

### Create Eval dataset in Langsmith

In [None]:
removed_item = json_output.pop(28)

In [None]:
removed_item

In [None]:
client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

In [None]:
dataset_name = "rag-evaluation-dataset"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Dataset for evaluating RAG pipeline"
)

In [None]:
for item in json_output:
    print(item["chunk_ids"])
    client.create_example(
        dataset_id=dataset.id,
        inputs={"question": item["question"]},
        outputs={
            "ground_truth": item["answer_example"],
            "reference_context_ids": item["chunk_ids"],
            "reference_descriptions": [get_description(id) for id in item["chunk_ids"]]
        }
    )