In [35]:
import os
import openai

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

### Download an example reference data point from LangSmith

In [7]:
client = Client()

In [8]:
dataset = client.read_dataset(
    dataset_name = "rag-evaluation-dataset"
)

In [9]:
dataset

Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('765c1fe4-ec16-47c9-808d-04c2d138ed40'), created_at=datetime.datetime(2026, 1, 21, 18, 54, 54, 450942, tzinfo=TzInfo(0)), modified_at=datetime.datetime(2026, 1, 21, 18, 54, 54, 450942, tzinfo=TzInfo(0)), example_count=211, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'macOS-15.6-arm64-arm-64bit', 'sdk_version': '0.6.4', 'runtime_version': '3.12.12', 'langchain_version': None, 'py_implementation': 'CPython', 'langchain_core_version': None}})

In [18]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

{'ground_truth': 'I cannot determine exact delivery dates or shipping ETA for a specific ZIP code from the provided product chunks, because they do not include shipping carriers, warehouse locations, or fulfillment timelines.',
 'reference_context_ids': [],
 'reference_description': []}

In [19]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs

{'question': 'What is the exact delivery date if I order the ACEMAGICIAN mini PC to ZIP code 90210?'}

In [20]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

### RAG Pipeline

In [36]:
import openai
from qdrant_client import QdrantClient


def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []
    retrieved_context_ratings = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        retrieved_context_ratings.append(result.payload["average_rating"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "retrieved_context_ratings": retrieved_context_ratings,
        "similarity_scores": similarity_scores
    }


def process_context(context):
    
    formatted_context = ""

    for id, chunk, rating in zip(context["retrieved_context_ids"], context["retrieved_context"], context["retrieved_context_ratings"]):
        formatted_context += f"- ID: {id}, rating: {rating}, description: {chunk}\n"

    return formatted_context



def build_prompt(preprocessed_context, question):

    prompt = f"""
    You are a shopping assistant that can answer questions about the products in stock.

    You will be given a question and a list of context.

    Instructions:
    - You need to answer the question based on the provided context only.
    - Never use word context and refer to it as the available products.

    Context:
    {preprocessed_context}

    Question:
    {question}
    """

    return prompt



def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-5-nano",
        messages=[{"role": "system", "content": prompt}],
        reasoning_effort="minimal"
    )

    return response.choices[0].message.content


def rag_pipeline(question, top_k=5):

    qdrant_client = QdrantClient(url="http://localhost:6333")

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result



In [37]:
rag_pipeline("Can I get some charger?", top_k=5)

{'answer': 'Yes. Here are some charger options currently available:\n\n- B0BYYLJRHT: iPhone Charger Cord Lightning Cables, 3-pack, 3ft, MFi certified. Durable, fast charging and data transfer. Compatible with iPhone models and more.\n- B0BFPZGYLD: 5 in 1 USB C to Multi Charging Cable (6 cables in one), 10ft. Includes Lightning, USB-C, Micro USB. MFi certified for iPhone.\n- B09TNXY54Y: MUXA 6 Pack, colorful nylon Lightning cables in multiple lengths (3/3/6/6/10/10 ft). MFi certified.\n- B0BV6PWVCG: GREPHONE 2-pack USB C to Lightning Cable, 6 ft, MFi certified, 6 ft length.\n- B0BGDQLZD2: Mixblu Charger Cable Replacement for Fitbit Inspire 3 (2-pack, 3.3 ft).\n\nIf you tell me which device you’re charging (iPhone, Android, Fitbit, etc.) and your preferred length, I can narrow down a recommended option.',
 'question': 'Can I get some charger?',
 'retrieved_context_ids': ['B0BYYLJRHT',
  'B0BFPZGYLD',
  'B09TNXY54Y',
  'B0BV6PWVCG',
  'B0BGDQLZD2'],
 'retrieved_context': ['iPhone Charger 

### RAGAS metrics

In [38]:
from ragas.dataset_schema import SingleTurnSample
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, ResponseRelevancy
from ragas.metrics.collections import Faithfulness

  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, ResponseRelevancy
  from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, ResponseRelevancy


In [39]:
ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

  ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [40]:
reference_input

{'question': 'What is the exact delivery date if I order the ACEMAGICIAN mini PC to ZIP code 90210?'}

In [41]:
reference_output

{'ground_truth': 'I cannot determine exact delivery dates or shipping ETA for a specific ZIP code from the provided product chunks, because they do not include shipping carriers, warehouse locations, or fulfillment timelines.',
 'reference_context_ids': [],
 'reference_description': []}

In [42]:
result = rag_pipeline(reference_input["question"])

In [44]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
        user_input=run["question"],
        response=run["answer"],
        retrieved_contexts=run["retrieved_context"]
    )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [None]:
await ragas_faithfulness(result, "")

In [46]:
async def ragas_response_relevancy(run, example):

    sample = SingleTurnSample(
        user_input=run["question"],
        response=run["answer"],
        retrieved_contexts=run["retrieved_context"]
    )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)

In [47]:
await ragas_response_relevancy(result, "")

np.float64(0.0)

In [48]:
async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
        retrieved_context_ids=run["retrieved_context_ids"],
        reference_context_ids=example["reference_context_ids"]
    )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [49]:
await ragas_context_precision_id_based(result, reference_output)

0.0

In [50]:
async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
        retrieved_context_ids=run["retrieved_context_ids"],
        reference_context_ids=example["reference_context_ids"]
    )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)

In [51]:
await ragas_context_recall_id_based(result, reference_output)



nan