In [1]:
import os
import openai

from langsmith import Client
from qdrant_client import QdrantClient

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

  from .autonotebook import tqdm as notebook_tqdm


### Doenload an example reference data point from LangSmith

In [2]:
client = Client()

In [3]:
dataset = client.read_dataset(
    dataset_name="rag-evaluation-dataset"
)

In [4]:
dataset

Dataset(name='rag-evaluation-dataset', description='Dataset for evaluating RAG pipeline', data_type=<DataType.kv: 'kv'>, id=UUID('334feebb-56b4-4fea-ba89-5f0088a9779f'), created_at=datetime.datetime(2025, 10, 13, 17, 59, 54, 478077, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2025, 10, 13, 17, 59, 54, 478077, tzinfo=datetime.timezone.utc), example_count=32, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None, metadata={'runtime': {'sdk': 'langsmith-py', 'library': 'langsmith', 'runtime': 'python', 'platform': 'macOS-13.7.8-x86_64-i386-64bit', 'sdk_version': '0.4.34', 'runtime_version': '3.12.7', 'langchain_version': '0.3.27', 'py_implementation': 'CPython', 'langchain_core_version': '0.3.79'}})

In [5]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].inputs

{'question': 'Do you have dedicated books or e-readers in stock?'}

In [6]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[0].outputs

{'ground_truth': 'No books or e-readers are currently available in inventory.',
 'reference_context_ids': [],
 'reference_descriptions': []}

In [7]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[3].inputs

{'question': 'Are there any gaming consoles or game controllers available?'}

In [8]:
list(client.list_examples(dataset_id=dataset.id, limit=10))[3].outputs

{'ground_truth': 'Currently, we do not have any consoles or game controllers in stock.',
 'reference_context_ids': [],
 'reference_descriptions': []}

In [9]:
reference_input = list(client.list_examples(dataset_id=dataset.id, limit=10))[3].inputs
reference_output = list(client.list_examples(dataset_id=dataset.id, limit=10))[3].outputs

### RAG Pipeline

In [10]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [11]:
def get_embedding(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model,
    )

    return response.data[0].embedding


def retrieve_data(query, qdrant_client, k=5):

    query_embedding = get_embedding(query)

    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k,
    )

    retrieved_context_ids = []
    retrieved_context = []
    similarity_scores = []

    for result in results.points:
        retrieved_context_ids.append(result.payload["parent_asin"])
        retrieved_context.append(result.payload["description"])
        similarity_scores.append(result.score)

    return {
        "retrieved_context_ids": retrieved_context_ids,
        "retrieved_context": retrieved_context,
        "similarity_scores": similarity_scores,
    }


def process_context(context):

    formatted_context = ""

    for id, chunk in zip(context["retrieved_context_ids"], context["retrieved_context"]):
        formatted_context += f"- {id}: {chunk}\n"

    return formatted_context


def build_prompt(preprocessed_context, question):

    prompt = f"""
You are a shopping assistant that can answer questions about the products in stock.

You will be given a question and a list of context.

Instructtions:
- You need to answer the question based on the provided context only.
- Never use word context and refer to it as the available products.

Context:
{preprocessed_context}

Question:
{question}
"""

    return prompt

def generate_answer(prompt):

    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[{"role": "system", "content": prompt}],
        temperature=0.5,
    )

    return response.choices[0].message.content


def rag_pipeline(question, qdrant_client, top_k=5):

    retrieved_context = retrieve_data(question, qdrant_client, top_k)
    preprocessed_context = process_context(retrieved_context)
    prompt = build_prompt(preprocessed_context, question)
    answer = generate_answer(prompt)

    final_result = {
        "answer": answer,
        "question": question,
        "retrieved_context_ids": retrieved_context["retrieved_context_ids"],
        "retrieved_context": retrieved_context["retrieved_context"],
        "similarity_scores": retrieved_context["similarity_scores"]
    }

    return final_result

In [12]:
rag_pipeline("Can I get some charger?", qdrant_client, top_k=5)

{'answer': 'Yes, there are several chargers available:\n\n1. iPhone Charger Cord Lightning Cables (3-pack, 3ft) - Apple MFi Certified, compatible with many iPhone and iPad models. (B0BYYLJRHT)\n\n2. 5 in 1 USB C to Multi Charging Cable (10ft) - Apple MFi Certified, includes Lightning, Type C, and Micro USB connectors for charging multiple devices simultaneously. (B0BFPZGYLD)\n\n3. MUXA 6 Pack iPhone Charger Colorful Nylon Lightning Cable - Apple MFi Certified, various lengths (3/3/6/6/10/10 ft), compatible with many iPhone and iPad models. (B09TNXY54Y)\n\n4. GREPHONE 2 Pack USB C to Lightning Cable (6ft) - Apple MFi Certified, supports fast charging for iPhone and iPad. (B0BV6PWVCG)\n\n5. Mixblu Charger Cable Replacement for Fitbit Inspire 3 (2 Pack, 3.3ft) - specifically for Fitbit Inspire 3. (B0BGDQLZD2)\n\nPlease let me know if you want more details or a specific type of charger.',
 'question': 'Can I get some charger?',
 'retrieved_context_ids': ['B0BYYLJRHT',
  'B0BFPZGYLD',
  'B0

### RAGAS metrics

In [22]:
from ragas.dataset_schema import SingleTurnSample 
from ragas.metrics import IDBasedContextPrecision, IDBasedContextRecall, Faithfulness, ResponseRelevancy

ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
ragas_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-small"))

ImportError: cannot import name 'IDBasedContextPrecision' from 'ragas.metrics' (/Users/reedallred/code/ai_bootcamp/.venv/lib/python3.12/site-packages/ragas/metrics/__init__.py)

In [15]:
reference_input

{'question': 'Are there any gaming consoles or game controllers available?'}

In [16]:
reference_output

{'ground_truth': 'Currently, we do not have any consoles or game controllers in stock.',
 'reference_context_ids': [],
 'reference_descriptions': []}

In [17]:
result = rag_pipeline(reference_input["question"], qdrant_client)

In [18]:
result

{'answer': 'There are no gaming consoles or game controllers available in the listed products. However, there are gaming-related products such as the MSI MAG342CQM curved gaming monitor and the RUSAM GA33 A TWS game earbuds.',
 'question': 'Are there any gaming consoles or game controllers available?',
 'retrieved_context_ids': ['B09PYFMTBF',
  'B0BY2ZBRZD',
  'B0CF1WM24K',
  'B09WCFC5D9',
  'B09Y39DSWR'],
 'retrieved_context': ['Wireless Keyboard and Mouse Combo, 2.4G Retro Computer Keyboard Mouse with Round Keys, Slim Quiet Keyboard Mouse with 2 in 1 Nano USB Receiver for for Windows, Laptop, PC, Notebook-Black Grey TRUE WIRELESS & PLUG AND PLAY: This wireless keyboard mouse set can keep you from the mess of the various cables on the desktop. The keyboard needs 2 AAA batteries and the mouse needs 1 AA battery. Just plug the receiver into the USB port and it quickly establishes a solid (up to 10m) connection with your computer SILENT CLICK & RETRO ROUND KEYCAP: Silent clicking and typ

In [19]:
async def ragas_faithfulness(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = Faithfulness(llm=ragas_llm)

    return await scorer.single_turn_ascore(sample)

In [20]:
await ragas_faithfulness(result, "")

NameError: name 'Faithfulness' is not defined

In [None]:
async def ragas_responce_relevancy(run, example):

    sample = SingleTurnSample(
            user_input=run["question"],
            response=run["answer"],
            retrieved_contexts=run["retrieved_context"]
        )
    scorer = ResponseRelevancy(llm=ragas_llm, embeddings=ragas_embeddings)

    return await scorer.single_turn_ascore(sample)

In [None]:
await ragas_responce_relevancy(result, "")

In [None]:
async def ragas_context_precision_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextPrecision()

    return await scorer.single_turn_ascore(sample)

In [None]:
await ragas_context_precision_id_based(result, reference_output)

In [None]:
async def ragas_context_recall_id_based(run, example):

    sample = SingleTurnSample(
            retrieved_context_ids=run["retrieved_context_ids"],
            reference_context_ids=example["reference_context_ids"]
        )
    scorer = IDBasedContextRecall()

    return await scorer.single_turn_ascore(sample)

In [None]:
await ragas_context_recall_id_based(result, reference_output)