In [2]:
import openai, os
from qdrant_client import QdrantClient

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

### Download an example reference data point from LangSmith

from dotenv import load_dotenv
load_dotenv("../../.env")

client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

In [8]:
dataset = client.read_dataset(
    dataset_name="rag-evaluation-dataset"
)

In [9]:
dataset

Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('52318da2-5b67-4a12-929c-333ce5288d1f'), created_at=datetime.datetime(2026, 1, 19, 20, 44, 19, 529875, tzinfo=TzInfo(0)), modified_at=datetime.datetime(2026, 1, 19, 20, 44, 19, 529875, tzinfo=TzInfo(0)), example_count=40, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'macOS-26.2-arm64-arm-64bit', 'sdk_version': '0.6.2', 'runtime_version': '3.12.0', 'langchain_version': '1.2.3', 'py_implementation': 'CPython', 'langchain_core_version': '1.2.7'}})

In [10]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

{'ground_truth': 'The 80x100 high-powered monocular (B0CH6P8DYF) explicitly includes a phone mount adapter and tripod for smartphone photography. The 10x42 monocular (B0BGLRMPQD) is a compact handheld model that includes lens covers and a case but does not explicitly mention a phone adapter.',
 'reference_context_ids': ['B0BGLRMPQD', 'B0CH6P8DYF'],
 'reference_descriptions': ["Monocular Telescope, 10x42 Monoculars for Adults, Usogood Compact Portable Waterproof Monocular with Hand Strap, Lightweight Handheld Pocket Telescope for Bird Watching 【10x42 High Definition and Comfortable Viewing】This monoculars for adults with a 42mm objective lens provide 10x magnification, which ensures that you can easily magnify the object with a stable view when observing handheld. With a large field of view of 360ft/1000yards, you can clearly see the mountains 1200 yards away. 【22.5 mm Extra Large Eyepiece, More Detail, Clearer and Brighter】Equipped with an oversized 22.5 mm eyepiece, this handheld mono

In [11]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs

{'question': 'Can the Monoculars (10x42) and the 80x100 Monocular be used with a smartphone to take photos?'}

In [13]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

### RAG Pipeline

In [14]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[{"role": "system", "content": prompt}],
        reasoning_effort="minimal"
    )

    return response.choices[0].message.content


def rag_pipeline(question, top_k=5):

    qdrant_client = QdrantClient(url="http://localhost:6333")

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [15]:
rag_pipeline("Can I get some charger?", top_k=5)

{'answer': 'Yes. Here are charger options in the available products:\n- B0BYYLJRHT: 3-pack Apple MFi certified iPhone/Apple device charging cables (Lightning, 3 ft each).\n- B0BFPZGYLD: 5 in 1 USB charging cable (Lightning, USB-C, Micro USB) for multiple devices; 10 ft long; charging only.\n- B09TNXY54Y: MUXA 6-pack colorful Nylon Lightning cables (several lengths available in sets; MFi certified).\n- B0BV6PWVCG: GREPHONE 2-pack USB-C to Lightning cables, 6 ft each; MFi certified.\n- B0BGDQLZD2: Mixblu replacement charging cables for Fitbit Inspire 3 (2-pack, 3.3 ft).\n\nTell me which type you want (e.g., Apple Lightning cables, multi-port USB/Lightning, or device-specific) and your preferred length and quantity, and I’ll help you choose.',
 'question': 'Can I get some charger?',
 'retrieved_context_ids': ['B0BYYLJRHT',
  'B0BFPZGYLD',
  'B09TNXY54Y',
  'B0BV6PWVCG',
  'B0BGDQLZD2'],
 'retrieved_context': ['iPhone Charger Cord Lightning Cables, Original 2022 Upgraded [3Pack 3ft] Apple 

### RAGAS metrics

In [16]:
from ragas.dataset_schema import SingleTurnSample 
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy

  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy


In [17]:
ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

  ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [18]:
reference_input

{'question': 'Can the Monoculars (10x42) and the 80x100 Monocular be used with a smartphone to take photos?'}

In [19]:
reference_output

{'ground_truth': 'The 80x100 high-powered monocular (B0CH6P8DYF) explicitly includes a phone mount adapter and tripod for smartphone photography. The 10x42 monocular (B0BGLRMPQD) is a compact handheld model that includes lens covers and a case but does not explicitly mention a phone adapter.',
 'reference_context_ids': ['B0BGLRMPQD', 'B0CH6P8DYF'],
 'reference_descriptions': ["Monocular Telescope, 10x42 Monoculars for Adults, Usogood Compact Portable Waterproof Monocular with Hand Strap, Lightweight Handheld Pocket Telescope for Bird Watching 【10x42 High Definition and Comfortable Viewing】This monoculars for adults with a 42mm objective lens provide 10x magnification, which ensures that you can easily magnify the object with a stable view when observing handheld. With a large field of view of 360ft/1000yards, you can clearly see the mountains 1200 yards away. 【22.5 mm Extra Large Eyepiece, More Detail, Clearer and Brighter】Equipped with an oversized 22.5 mm eyepiece, this handheld mono

In [21]:
result = rag_pipeline(reference_input["question"])

In [22]:
result

{'answer': 'Yes. \n\n- The 10x42 Monocular (Usogood) includes a phone-friendly feature: it mentions a large eyepiece and accessories, and it notes you can observe and take photos with the device. It does not explicitly say smartphone photo capability, but the package includes accessories for mobile use, which implies compatibility for capturing images with a phone.\n\n- The 80x100 High Powered Monocular states that in the package you get a "Phone Hold" and with a phone adapter and a tripod you can take clearer and more beautiful photos outdoors. This clearly supports taking photos with a smartphone.',
 'question': 'Can the Monoculars (10x42) and the 80x100 Monocular be used with a smartphone to take photos?',
 'retrieved_context_ids': ['B0CH6P8DYF',
  'B0BGLRMPQD',
  'B0BG5L2YLC',
  'B0B2JJJFCD',
  'B0C4DBSWGW'],
 'retrieved_context': ['New 2023 80x100 High Powered Monoculars for Adults high Powered BAK-4 Prism and FMC Lens Monocular Telescope for Smartphone Monoculars for Bird Watchin

In [23]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [24]:
await ragas_faithfulness(result, "")

0.6666666666666666

In [25]:
async def ragas_responce_relevancy(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)

In [26]:
await ragas_responce_relevancy(result, "")

np.float64(0.9095437540979158)

In [27]:
async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [28]:
await ragas_context_precision_id_based(result, reference_output)

0.4

In [29]:
async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)

In [30]:
await ragas_context_recall_id_based(result, reference_output)

1.0