In [101]:
import os

import openai

from langsmith import Client

from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings


### Download an example reference data point from Langsmith

In [3]:
# langsmith client initialization
client = Client()

In [None]:
dataset = client.read_dataset(dataset_name="rag-eval-dataset")

In [10]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs

{'question': 'Do you carry graphics cards or internal PC components like motherboards or RAM sticks?'}

In [23]:
list(client.list_examples(dataset_id=dataset.id, limit=20))[0].outputs

{'ground_truth': 'There are currently no graphics cards or internal PC components such as motherboards or RAM sticks available.',
 'reference_context_ids': [],
 'reference_context_text': []}

In [41]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=36))[35].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=36))[35].outputs

In [58]:
reference_input

{'question': 'Which earbud products are compatible with both iPhone and Android devices and offer waterproof protection?'}

In [91]:
reference_output

{'ground_truth': 'The TELSOR Wireless Earbuds (B0C6K1GQCF) are compatible with most Bluetooth-enabled iPhones and Android devices, offer IPX7 waterproof protection, and have a long playtime. Similarly, the Open Ear Headphones (B0CBMPG524) provide IPX7 waterproofing, comfort, and are suitable for use with various devices. The Wireless Earbuds (B0B9FTVL58) also offer Bluetooth 5.3 connectivity, are compatible with smartphones, and have IPX7 waterproofing.',
 'reference_context_ids': ['B0C6K1GQCF', 'B0CBMPG524', 'B0B9FTVL58'],
 'reference_context_text': ["TELSOR Wireless Earbuds for iPhone, Bluetooth Headphones Touch Control Stereo Sound Bluetooth Earbuds with Noise Cancelling Mic for Calls, 30H Playtime, IPX7 Waterproof Earbuds for Android, Black ‚ô¨„ÄêBluetooth„ÄëPair instantly with an uninterrupted and stable transmission with Bluetooth 5.1. AVRCP, HCP, HSP, and A2DP profiles are supported. The wireless earbuds are compatible with most Bluetooth enabled iPhones, Andriods, smart TVs, co

In [None]:
#### RAG Pipeline (copied from rag-retrieval-generation.py)

In [54]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [55]:
def get_embeddings(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        model=model,
        input=text
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):
    """
    Retrieve k most similar items to the query from Qdrant collection.
    """
    response = qdrant_client.query_points(
        collection_name="amazon_items-collection-00",
        query=get_embeddings(query),
        limit=k
    )

    retrieved_context_ids = [];
    # this is description of the product
    retrieved_context = [];
    similarity_scores = [];
    for point in response.points:   
        retrieved_context_ids.append(point.payload["parent_asin"])
        retrieved_context.append(point.payload["description"])
        similarity_scores.append(point.score)
    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "similarity_scores": similarity_scores
    }

def format_retrieved_context(context_data):
    formatted_context = ""
    for id, context in zip(context_data["retrieved_context_ids"], context_data["retrieved_context"]):
            formatted_context += f"- {id}: {context}\n"
    return formatted_context

def create_prompt(query, preprocessed_retrieved_context):
    processed_context = format_retrieved_context(preprocessed_retrieved_context)
    
    prompt = f"""
    You are a helpful shopping assistant who can answer questions about the products in stock.
    You are given a question and a list of products with their descriptions.
    Your job is to answer the question based on the context.

    Instructions:
    - Answer the question based on the context only.
    - Never use the word "context" in your answer and refer to it as available products.
    - If you don't know the answer, say "I don't know".
    - If the question is not related to the context, say "I don't know".

    Question: 
    {query}
    Context: 
    {processed_context}
    """
    return prompt

def generate_answer(prompt):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5
    )

    return response.choices[0].message.content

def rag_pipeline(query, qdrant_client, top_k=5):
    preprocessed_retrieved_context = retrieve_data(query, qdrant_client, top_k)
    prompt = create_prompt(query, preprocessed_retrieved_context)
    answer = generate_answer(prompt)

    final_response = {
        "question": query,
        "answer": answer,
        "retrieved_context_ids": preprocessed_retrieved_context["retrieved_context_ids"],
        "retrieved_context": preprocessed_retrieved_context["retrieved_context"],
        "similarity_scores": preprocessed_retrieved_context["similarity_scores"]
    }

    return final_response

In [74]:
rag_pipeline("What is the price of the product with the highest rating?", qdrant_client, 5)

{'question': 'What is the price of the product with the highest rating?',
 'answer': "I don't know the price of the product with the highest rating based on the available products.",
 'retrieved_context_ids': ['B0BP9Z159S',
  'B0CH6P8DYF',
  'B0B9FTVL58',
  'B09XCKYXR8',
  'B0BNN776VN'],
 'retrieved_context': ["UseBean 240W USB C to USB C Cable 6.6ft (2 Pack),USB 3.2 Gen 2X2 20Gbps Data Transfer Type-C Cable PD Fast Charging,4K Video Monitor Cord,for Thunderbolt 3/4 MacBook Pro, iPad Pro,Galaxy S21/S20 „Äê240W USB C Cable„ÄëSupports PD3.1 fast charging protocol;Supports fast charge up to 240W(48V 5A) when paired up with a compatible cable and charger. Backward compatible with 200/170/140/100/60/18W power devices.(Note: Charging speed may be dependent on the host device's charging capability). „Äê20Gbps Data Transfer„ÄëOffers 20Gbps SuperSpeed data transfer ,transferring an HD movie in just seconds.(NOTE: The transfer speed depends on the speed supported by your device port.) „Äê4K@60Hz

In [75]:
result =rag_pipeline(reference_input["question"], qdrant_client, 5)

In [77]:
result

{'question': 'Which earbud products are compatible with both iPhone and Android devices and offer waterproof protection?',
 'answer': 'The following earbud products are compatible with both iPhone and Android devices and offer waterproof protection:\n\n1. TELSOR Wireless Earbuds (B0C6K1GQCF) - Compatible with most Bluetooth-enabled iPhones and Androids, featuring IPX7 waterproof rating.\n\n2. Open Ear Headphones Bluetooth 5.3 Earbuds (B0CBMPG524) - Compatible with Bluetooth devices including iPhone and Android, with IPX7 waterproof rating.\n\n3. Wireless Earbuds Bluetooth 5.3 Headphones (B0B9FTVL58) - These earbuds are Bluetooth 5.3 compatible and have an IPX7 waterproof rating.\n\nAll three provide waterproof protection and support Bluetooth connectivity with both iPhone and Android devices.',
 'retrieved_context_ids': ['B0C6K1GQCF',
  'B0CBMPG524',
  'B0B9FTVL58',
  'B0BS15TRJ3',
  'B0BNVKS9WH'],
 'retrieved_context': ["TELSOR Wireless Earbuds for iPhone, Bluetooth Headphones Touch C

In [78]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.dataset_schema import SingleTurnSample
from ragas.metrics import Faithfulness, IDBasedContextPrecision, IDBasedContextRecall, ResponseRelevancy

# Initialize LLM for ragas
llm = ChatOpenAI(model="gpt-4.1-mini")
ragas_llm = LangchainLLMWrapper(llm)
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

  ragas_llm = LangchainLLMWrapper(llm)
  ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))


In [79]:
async def ragas_faithfulness(run, example):
    """
    Calculate the faithfulness score of the RAG pipeline.
    """

    sample = SingleTurnSample(
        user_input=run['question'],
        response=run['answer'],
        retrieved_contexts=run['retrieved_context'],
    )
    scorer = Faithfulness(llm=ragas_llm)
    return await scorer.single_turn_ascore(sample)

In [80]:
await ragas_faithfulness(result, "")

0.5

In [81]:
result["question"]

'Which earbud products are compatible with both iPhone and Android devices and offer waterproof protection?'

In [83]:
print(result["answer"])

The following earbud products are compatible with both iPhone and Android devices and offer waterproof protection:

1. TELSOR Wireless Earbuds (B0C6K1GQCF) - Compatible with most Bluetooth-enabled iPhones and Androids, featuring IPX7 waterproof rating.

2. Open Ear Headphones Bluetooth 5.3 Earbuds (B0CBMPG524) - Compatible with Bluetooth devices including iPhone and Android, with IPX7 waterproof rating.

3. Wireless Earbuds Bluetooth 5.3 Headphones (B0B9FTVL58) - These earbuds are Bluetooth 5.3 compatible and have an IPX7 waterproof rating.

All three provide waterproof protection and support Bluetooth connectivity with both iPhone and Android devices.


In [85]:
result["retrieved_context"]

["TELSOR Wireless Earbuds for iPhone, Bluetooth Headphones Touch Control Stereo Sound Bluetooth Earbuds with Noise Cancelling Mic for Calls, 30H Playtime, IPX7 Waterproof Earbuds for Android, Black ‚ô¨„ÄêBluetooth„ÄëPair instantly with an uninterrupted and stable transmission with Bluetooth 5.1. AVRCP, HCP, HSP, and A2DP profiles are supported. The wireless earbuds are compatible with most Bluetooth enabled iPhones, Andriods, smart TVs, computers, etc. Each wireless earbuds will pair with each other when they are removed from the charging case. From here, enable Bluetooth on your chosen device and pair with the headphones. ‚ô¨„ÄêClear Call & Sound quality„ÄëEach wireless earbuds features a 10mm diameter speaker and 2 microphones to reduce ambient noise and transmit your voice for a clear call in any environment. For the music enjoyers, these wireless earbuds offer a deep bass for an immersive musical experience. ‚ô¨„ÄêLong Battery Life & Quick Charge„ÄëGet up to 6 hours of playtime on 

In [86]:
async def ragas_response_relevancy(run, example):
    """
    Calculate the response relevancy score of the RAG pipeline.
    """
    sample = SingleTurnSample(
        user_input=run['question'],
        response=run['answer'],
        retrieved_contexts=run['retrieved_context'],
    )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)
    return await scorer.single_turn_ascore(sample)

In [87]:
await ragas_response_relevancy(result, "")

np.float64(0.9999995796547293)

In [94]:
async def ragas_id_based_context_precision(run, example):
    """
    Calculate the ID-based context precision score of the RAG pipeline.
    """
    sample = SingleTurnSample(
        retrieved_context_ids=run['retrieved_context_ids'],
        reference_context_ids=example['reference_context_ids'],
    )

    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)


In [95]:
await ragas_id_based_context_precision(result, reference_output)

0.6

In [96]:
result["retrieved_context_ids"]

['B0C6K1GQCF', 'B0CBMPG524', 'B0B9FTVL58', 'B0BS15TRJ3', 'B0BNVKS9WH']

In [97]:
reference_output["reference_context_ids"]

['B0C6K1GQCF', 'B0CBMPG524', 'B0B9FTVL58']

In [98]:
async def ragas_id_based_context_recall(run, example):
    """
    Calculate the ID-based context recall score of the RAG pipeline.
    """
    sample = SingleTurnSample(
        retrieved_context_ids=run['retrieved_context_ids'],
        reference_context_ids=example['reference_context_ids'], 
    )
    scorer = IDBasedContextRecall()
    return await scorer.single_turn_ascore(sample)


In [100]:
await ragas_id_based_context_recall(result, reference_output)

1.0