In [1]:

import os
import openai

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

  from .autonotebook import tqdm as notebook_tqdm


### Download an ecample reference data point from Lang Smith

In [2]:

client = Client()

In [3]:
dataset = client.read_dataset(
    dataset_name="rag-evaluation-dataset"
)

In [4]:
dataset

Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('53e55163-86dc-4558-bbfd-240c89318e5d'), created_at=datetime.datetime(2025, 10, 22, 11, 20, 49, 994710, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2025, 10, 22, 11, 20, 49, 994710, tzinfo=datetime.timezone.utc), example_count=33, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'Windows-11-10.0.26200-SP0', 'sdk_version': '0.4.37', 'runtime_version': '3.12.10', 'langchain_version': None, 'py_implementation': 'CPython', 'langchain_core_version': '1.0.0'}})

In [5]:
list(client.list_examples(dataset_id=dataset.id, limit=15))[0].inputs

{'question': 'Is there a product available for purifying indoor air?'}

In [None]:
list(client.list_examples(dataset_id=dataset.id, limit=15))[0].outputs

{'ground_truth': 'No, we do not sell products for air purification at this time.',
 'reference_context_ids': [],
 'reference_descriptions': []}

In [7]:
list(client.list_examples(dataset_id=dataset.id, limit=15))[11].inputs

{'question': 'Which products could be used for upgrading or organizing a workspace for a desktop computer?'}

In [8]:
list(client.list_examples(dataset_id=dataset.id, limit=15))[11].outputs

{'ground_truth': 'For organizing your desktop workspace, you can use the Bextsrack Monitor Stand Riser (B09XCKYXR8), UPPERCASE GhostCover Keyboard Protector (B09LVX3XW2), ACEMAGICIAN Mini PC (B0C9XFF3CT), and SKYBASIC Wireless Digital Microscope (B0BG5L2YLC) for digital exploration.',
 'reference_context_ids': ['B09XCKYXR8',
  'B09LVX3XW2',
  'B0C9XFF3CT',
  'B0BG5L2YLC'],
 'reference_descriptions': ['Bextsrack Monitor Stand Riser, Metal Desktop Stand for Computer, Laptop, PC, Notebook, Printer, 2 Pack, Black üñ•Ô∏è MULTIPURPOSE COMPUTER RISER - Suitable for a PC monitor, computer, iMac, printer, laptop, TV and LCD display. Ideal addition to home, office, dorm or workspace. üíª HEALTH WORK - This monitor riser elevates your monitor to a suitable viewing height, helps you to form a healthy sitting position, highly relieving back pain and shoulder ache. üñ•Ô∏è PREVENTS OVERHEATING - Adopt unique perforated holes design, this monitor stand allows better air circulation to cool your dev

In [20]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=14))[11].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=14))[11].outputs

### RAG Pipeline

In [9]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [12]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model       
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):
    query_embedding = get_embedding(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk in zip(context["retrieved_context_ids"], context["retrieved_context"]):
        formatted_context += f"- {id}: {chunk}\n"
    
    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt


def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[{"role":"system", "content": prompt},],
        temperature=0.5,
    )


    return response.choices[0].message.content




def rag_pipeline(question, qdrant_client, top_k=5):

    

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result ={
        "answer": answer,
        "question": question,
        "retrieved_context": retrieved_context["retrieved_context"],
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context_similarity_scores": retrieved_context["similarity_scores"],
    }

    # return answer
    return final_result

In [13]:
rag_pipeline("Can i get a charger?", qdrant_client, 5)

{'answer': 'Yes, there are several chargers available for you to choose from:\n\n1. iPhone Charger Cord Lightning Cables (3 Pack, 3ft) - Apple MFi Certified, compatible with iPhone 13, 12, 11, XR, X, SE, and more. Black color with reinforced joint design for durability. (B0BYYLJRHT)\n\n2. 5 in 1 USB C to Multi Charging Cable (3M/10Ft) - Apple MFi Certified, includes Lightning, Type C, and Micro USB connectors for charging multiple devices simultaneously. (B0BFPZGYLD)\n\n3. Mixblu Charger Cable Replacement for Fitbit Inspire 3 (2 Pack, 3.3Ft) - Specifically designed for Fitbit Inspire 3. (B0BGDQLZD2)\n\n4. GREPHONE 2 Pack USB C to Lightning Cable (6 FT) - MFi Certified, fast charging compatible with various iPhone and iPad models, extra-long cable for convenience. (B0BV6PWVCG)\n\n5. MUXA 6 Pack Apple MFi Certified Lightning Cables - Various lengths (3, 6, 10 FT), colorful nylon cables compatible with multiple iPhone and iPad models. (B09TNXY54Y)\n\nPlease let me know if you want more de

### RAGAS Metrics

In [18]:
from ragas.dataset_schema import SingleTurnSample 
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy

ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

  ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [21]:
reference_input

{'question': 'Which products could be used for upgrading or organizing a workspace for a desktop computer?'}

In [22]:
reference_output

{'ground_truth': 'For organizing your desktop workspace, you can use the Bextsrack Monitor Stand Riser (B09XCKYXR8), UPPERCASE GhostCover Keyboard Protector (B09LVX3XW2), ACEMAGICIAN Mini PC (B0C9XFF3CT), and SKYBASIC Wireless Digital Microscope (B0BG5L2YLC) for digital exploration.',
 'reference_context_ids': ['B09XCKYXR8',
  'B09LVX3XW2',
  'B0C9XFF3CT',
  'B0BG5L2YLC'],
 'reference_descriptions': ['Bextsrack Monitor Stand Riser, Metal Desktop Stand for Computer, Laptop, PC, Notebook, Printer, 2 Pack, Black üñ•Ô∏è MULTIPURPOSE COMPUTER RISER - Suitable for a PC monitor, computer, iMac, printer, laptop, TV and LCD display. Ideal addition to home, office, dorm or workspace. üíª HEALTH WORK - This monitor riser elevates your monitor to a suitable viewing height, helps you to form a healthy sitting position, highly relieving back pain and shoulder ache. üñ•Ô∏è PREVENTS OVERHEATING - Adopt unique perforated holes design, this monitor stand allows better air circulation to cool your dev

In [23]:
result = rag_pipeline(reference_input["question"], qdrant_client)

In [24]:
result

{'answer': 'The products suitable for upgrading or organizing a workspace for a desktop computer are:\n\n1. B09XCKYXR8: Bextsrack Monitor Stand Riser - This metal desktop stand elevates your monitor to a suitable viewing height, helps form a healthy sitting posture, prevents overheating with perforated holes for air circulation, and maximizes desk space by allowing storage underneath.\n\n2. B09PYFMTBF: Wireless Keyboard and Mouse Combo - This wireless set reduces cable clutter on the desktop, providing a cleaner and more organized workspace with a compact and slim design.\n\n3. B0B1MPKCS8: Punoge Small USB Desk Fan - This portable and quiet desk fan can improve comfort in the workspace by providing adjustable airflow and colorful LED lighting, enhancing the overall environment.\n\nThese products help enhance ergonomics, reduce clutter, and improve comfort in a desktop computer workspace.',
 'question': 'Which products could be used for upgrading or organizing a workspace for a desktop 

In [25]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [26]:
await ragas_faithfulness(result, "")

1.0

In [27]:
async def ragas_responce_relevancy(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)

In [28]:
await ragas_responce_relevancy(result, "")

np.float64(0.9530521065860919)

In [29]:
async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [30]:
await ragas_context_precision_id_based(result, reference_output)

0.4

In [31]:
async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)

In [32]:
await ragas_context_recall_id_based(result, reference_output)

0.5

In [33]:
import ragas.metrics
dir(ragas.metrics)

['AgentGoalAccuracyWithReference',
 'AgentGoalAccuracyWithoutReference',
 'AnswerAccuracy',
 'AnswerCorrectness',
 'AnswerRelevancy',
 'AnswerSimilarity',
 'AspectCritic',
 'BaseMetric',
 'BleuScore',
 'ChrfScore',
 'ContextEntityRecall',
 'ContextPrecision',
 'ContextRecall',
 'ContextRelevance',
 'ContextUtilization',
 'DataCompyScore',
 'DiscreteMetric',
 'DistanceMeasure',
 'ExactMatch',
 'FactualCorrectness',
 'Faithfulness',
 'FaithfulnesswithHHEM',
 'IDBasedContextPrecision',
 'IDBasedContextRecall',
 'InstanceRubrics',
 'LLMContextPrecisionWithReference',
 'LLMContextPrecisionWithoutReference',
 'LLMContextRecall',
 'LLMMetric',
 'LLMSQLEquivalence',
 'Metric',
 'MetricOutputType',
 'MetricResult',
 'MetricType',
 'MetricWithEmbeddings',
 'MetricWithLLM',
 'MultiModalFaithfulness',
 'MultiModalRelevance',
 'MultiTurnMetric',
 'NoiseSensitivity',
 'NonLLMContextPrecisionWithReference',
 'NonLLMContextRecall',
 'NonLLMStringSimilarity',
 'NumericMetric',
 'RankingMetric',
 'Respo

In [34]:
import inspect, ragas.metrics as m
print(inspect.getmodule(m.IDBasedContextPrecision).__name__)
print(inspect.getmodule(m.IDBasedContextRecall).__name__)

ragas.metrics._context_precision
ragas.metrics._context_recall
