### Test a RAG based system with LLM being an Evaluator 🧪

In [1]:
from dotenv import load_dotenv
from langchain_ollama import ChatOllama

load = load_dotenv('./../.env')


llm = ChatOllama(
    base_url="http://localhost:11434",
    model = "qwen2.5:latest",
    temperature=0.5,
    max_tokens = 250
)

llm2 = ChatOllama(
    base_url="http://localhost:11434",
    model = "llama3.1:70b",
    temperature=0.5,
    max_tokens = 250
)

#### Embedding

In [2]:
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma


embeddings = OllamaEmbeddings(model="llama3.2:latest")

In [3]:
from langchain.docstore.document import Document

docs = [
    Document(page_content="Playwright is a modern automation library for end-to-end testing. It supports multiple browsers like Chromium, Firefox, and WebKit."),
    Document(page_content="Selenium is a widely used open-source framework for web automation, supporting multiple programming languages and browsers."),
    Document(page_content="Cypress is a JavaScript-based testing tool primarily used for front-end testing. It runs in the browser and provides fast feedback loops."),
    Document(page_content="Playwright allows network interception, headless execution, and tracing for debugging complex web applications."),
    Document(page_content="Selenium WebDriver enables automated browser testing using various bindings such as Python, Java, and C#."),
    Document(page_content="Cypress has built-in support for retries, time-travel debugging, and automatic waiting, making it easy to test dynamic web pages."),
    Document(page_content="This document talks about REST API testing tools, which are unrelated to Playwright, Selenium, or Cypress.")
]

questions = [
    "What is Playwright and what browsers does it support?",
    "What is Selenium and what programming languages does it support?",
    "What is Cypress, and how is it different from Selenium?",
    "How does Playwright handle network interception and debugging?",
    "What are the key features of Selenium WebDriver?",
    "What are the main advantages of Cypress for testing?",
    "What are REST API testing tools, and how are they different from browser automation tools?"
]

vector_store = Chroma.from_documents(docs, embeddings)



#### Retrieval QA and Retriever

In [4]:
from langchain.chains import RetrievalQA

retriever = vector_store.as_retriever(search_kwargs={"k": 3})

qa_chain = RetrievalQA.from_chain_type(llm=llm,retriever=retriever)

query = "What playwright does?"

response = qa_chain.run(query)

retrieved_docs = retriever.get_relevant_documents(query)

response, retrieved_docs


  response = qa_chain.run(query)
  retrieved_docs = retriever.get_relevant_documents(query)


('Playwright allows network interception, headless execution, and tracing for debugging complex web applications. It also supports multiple browsers like Chromium, Firefox, and WebKit, making it a modern automation library for end-to-end testing.',
 [Document(id='ab2e3850-6f6e-469a-9a57-844d191e7ea4', metadata={}, page_content='Playwright allows network interception, headless execution, and tracing for debugging complex web applications.'),
  Document(id='38076b0c-0873-441d-aeb1-7af3bda303e3', metadata={}, page_content='Selenium WebDriver enables automated browser testing using various bindings such as Python, Java, and C#.'),
  Document(id='360f8bde-a412-446d-85ea-bf4291753a4c', metadata={}, page_content='Playwright is a modern automation library for end-to-end testing. It supports multiple browsers like Chromium, Firefox, and WebKit.')])

#### Creating MultishotSample Dataset for RAGAs 📈

In [5]:
dataset = []

for question, doc in zip(questions, docs): 
    relevant_docs = [doc.page_content for doc in retriever.get_relevant_documents(question)]
    response = qa_chain.run(question)
    
    dataset.append({
        
        "user_input": question,
        "retrieved_contexts": relevant_docs,
        "response": response,
        "reference": doc.page_content
        
    })
    
dataset


[{'user_input': 'What is Playwright and what browsers does it support?',
  'retrieved_contexts': ['Playwright is a modern automation library for end-to-end testing. It supports multiple browsers like Chromium, Firefox, and WebKit.',
   'Playwright allows network interception, headless execution, and tracing for debugging complex web applications.',
   'Selenium WebDriver enables automated browser testing using various bindings such as Python, Java, and C#.'],
  'response': 'Playwright is a modern automation library for end-to-end testing. It supports multiple browsers like Chromium, Firefox, and WebKit.',
  'reference': 'Playwright is a modern automation library for end-to-end testing. It supports multiple browsers like Chromium, Firefox, and WebKit.'},
 {'user_input': 'What is Selenium and what programming languages does it support?',
  'retrieved_contexts': ['Selenium WebDriver enables automated browser testing using various bindings such as Python, Java, and C#.',
   'Selenium is a 

#### RAGAs Evaluation of RAG data

In [6]:
from ragas import EvaluationDataset
evaluation_dataset = EvaluationDataset.from_list(dataset)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI

from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, AnswerRelevancy

evaluator_openai_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))

result = evaluate(dataset=evaluation_dataset, 
                  metrics=[LLMContextRecall(), 
                           Faithfulness(), 
                           FactualCorrectness(),
                           AnswerRelevancy()
                           ], 
                  llm=evaluator_openai_llm)

result


Evaluating: 100%|██████████| 28/28 [01:01<00:00,  2.21s/it]


{'context_recall': 0.8571, 'faithfulness': 0.5725, 'factual_correctness': 0.5729, 'answer_relevancy': 0.9794}

In [8]:
result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_recall,faithfulness,factual_correctness,answer_relevancy
0,What is Playwright and what browsers does it s...,[Playwright is a modern automation library for...,Playwright is a modern automation library for ...,Playwright is a modern automation library for ...,1.0,1.0,1.0,1.0
1,What is Selenium and what programming language...,[Selenium WebDriver enables automated browser ...,Selenium is a widely used open-source framewor...,Selenium is a widely used open-source framewor...,1.0,1.0,0.6,1.0
2,"What is Cypress, and how is it different from ...",[Selenium WebDriver enables automated browser ...,Cypress is a JavaScript-based end-to-end testi...,Cypress is a JavaScript-based testing tool pri...,0.0,0.076923,0.35,0.980625
3,How does Playwright handle network interceptio...,"[Playwright allows network interception, headl...","Playwright allows network interception, which ...","Playwright allows network interception, headle...",1.0,0.2,0.43,0.99159
4,What are the key features of Selenium WebDriver?,[Selenium WebDriver enables automated browser ...,Selenium WebDriver enables automated browser t...,Selenium WebDriver enables automated browser t...,1.0,1.0,0.75,0.917806
5,What are the main advantages of Cypress for te...,[Cypress is a JavaScript-based testing tool pr...,The main advantages of Cypress for testing inc...,"Cypress has built-in support for retries, time...",1.0,0.5,0.88,1.0
6,"What are REST API testing tools, and how are t...",[This document talks about REST API testing to...,The document does not provide specific informa...,This document talks about REST API testing too...,1.0,0.230769,0.0,0.965968
