# Evaluate langchain RAG application with RAGAS

## Install dependencies

In [1]:
! pip install -qU openai langchain transformers tiktoken  sentence-transformers qdrant-client
! pip install -qU ragas==0.0.22
! pip install faiss-cpu

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.4/227.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m807.5/807.5 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.5/156.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m214.4/214.4 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.9/256.9 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━

## import libraries

In [174]:
from qdrant_client import models, QdrantClient
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores.qdrant import Qdrant
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
#
from tqdm.auto import tqdm
from uuid import uuid4
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import numpy as np
import pandas as pd
from time import time,sleep
import openai
import tiktoken
#
import os
import json
#
import io
from langchain.vectorstores.faiss import FAISS
from pathlib import Path
from langchain.document_loaders import TextLoader
from langchain.text_splitter import MarkdownTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains.llm import LLMChain
from datasets import Dataset
from ragas.langchain.evalchain import RagasEvaluatorChain
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_relevancy,
    context_recall,
    context_precision
)
from ragas import evaluate

In [4]:
import os
os.environ["OPENAI_API_KEY"] = ""

## Set eval dataset

In [175]:
#In order to evaluate the qa system we generated a few relevant questions and answers
questions = ["What is SageMaker?",
             "What are all AWS regions where AWS SageMaker is available?",
             "How to check if an endpoint is KMS encrypted?",
             "What are SageMaker Geospatial capabilities?"
            ]
ground_truths = [["SageMaker is a fully managed machine learning service provided by Amazon. It allows data scientists and developers to build, train, and deploy machine learning models in a production-ready environment. It also provides features like Jupyter notebook integration, Git repository association, and integration with tools such as Secrets Manager. If you need more specific information, please let me know."],
                ["AWS SageMaker is available in the following AWS Regions:- US East (Ohio)- US East (N. Virginia)- US West (N. California)- US West (Oregon)- Africa (Cape Town)- Asia Pacific (Hong Kong)- Asia Pacific (Mumbai)- Asia Pacific (Osaka)- Asia Pacific (Seoul)- Asia Pacific (Singapore)- Asia Pacific (Sydney)- Asia Pacific (Jakarta)- Asia Pacific (Tokyo)- Canada (Central)- China (Beijing)- China (Ningxia)- Europe (Frankfurt)- Europe (Ireland)- Europe (London)- Europe (Paris)- Europe (Stockholm)- Europe (Milan)- Middle East (Bahrain)- South America (São Paulo)- AWS GovCloud (US-West)"],
                ["To check if an endpoint is KMS encrypted, you can use the SAGEMAKER_ENDPOINT_CONFIGURATION_KMS_KEY_CONFIGURED rule. This rule checks whether the KmsKeyId is specified for the Amazon SageMaker endpoint configuration. If the KmsKeyId is not specified, the rule will be considered NON_COMPLIANT. You can use this rule periodically in all supported AWS regions, except China, Asia Pacific (Jakarta), Africa (Cape Town), Middle East (UAE), Asia Pacific (Hyderabad), Asia Pacific (Osaka), Asia Pacific (Melbourne), Europe (Milan), AWS GovCloud (US-East), Europe (Spain), China (Ningxia), and Europe (Zurich) Regions."],
                ["SageMaker Geospatial capabilities refer to the operations and functionalities provided by Amazon SageMaker for performing geospatial tasks. These capabilities allow users to perform operations on geospatial data and leverage machine learning algorithms for geospatial analysis"]]

examples = [
    {"query": q, "ground_truths": ground_truths[i]}
    for i, q in enumerate(questions)]


## Create evaluation functions

In [169]:
# Define the folder path where you want to search for .md files
def create_vectorstore(chunk_size, chunk_overlap):
    markdown_path = Path('sagemaker_documentation')

    data = []
    for file in markdown_path.glob('*.md'):
        loader = TextLoader(str(file))
        data += loader.load()

    text_splitter = MarkdownTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
    )
    data = text_splitter.split_documents(data)

    texts = [doc.page_content for doc in data]
    metadatas = [doc.metadata for doc in data]

    embeddings = OpenAIEmbeddings()
    return FAISS.from_documents(data, embeddings)

def create_chain(temperature, prompt):
    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo-0613",
        temperature=temperature,
        request_timeout=45,
    )

    question_chain = LLMChain(
        llm=llm,
        prompt=prompt,
        output_key="answer",
        verbose=False,
    )

    return question_chain

def format_docs(docs):
    formatted_docs = []
    for doc in docs:
        formatted_doc = f"'{doc.page_content}'"
        source = doc.metadata.get('source', None)
        if source:
            formatted_doc += f"\n fuente: '{source}'"
        formatted_docs.append(formatted_doc)
    return '\n\n'.join(formatted_docs)

def run_predictions(examples, doc_store, question_chain):
    predictions = []
    for question in examples:
        docs = doc_store.similarity_search(
            question['query'], k=4
        )

        current_dict = {
            "context": format_docs(docs),
            "new_question": question['query']
        }
        current_dict['ground_truths'] = question['ground_truths']
        current_result = question_chain(current_dict)
        current_result['context'] = docs
        predictions.append(current_result)

    return predictions

def create_dataset(predictions):
    data_samples = {
        'question': [query['new_question'] for query in predictions],
        'answer': [query['answer'] for query in predictions],
        'contexts' : [query['context'] for query in predictions],
        'ground_truths': [query['ground_truths'] for query in predictions]
    }

    all_context = []
    for doc in data_samples['contexts']:
        current_context = []
        for page in doc:
            current_context.append(page.page_content)
        all_context.append(current_context)

    data_samples['contexts'] = all_context
    return Dataset.from_dict(data_samples)

def evaluate_dataset(dataset, name):
    score = evaluate(dataset, metrics=[context_precision, context_recall, faithfulness, answer_relevancy])
    score.to_pandas()
    df = score.to_pandas()
    df['chain'] = name
    return df

## RetrievalQA

In [5]:
# Define the folder path where you want to search for .md files
markdown_path = Path('sagemaker_documentation')

data = []
for file in markdown_path.glob('*.md'):
    loader = TextLoader(str(file))
    data += loader.load()

text_splitter = MarkdownTextSplitter(
    chunk_size=3000,
    chunk_overlap=200,
)
data = text_splitter.split_documents(data)

texts = [doc.page_content for doc in data]
metadatas = [doc.metadata for doc in data]

embeddings = OpenAIEmbeddings()

doc_store = Qdrant.from_texts(texts,
                              metadatas=metadatas,
                              embedding=embeddings,
                              location=":memory:",
                              prefer_grpc=True,
                              collection="amazon_qa_search")

  warn_deprecated(


In [64]:
#query vector store
prompt_template = """Use the following pieces of context to answer the question enclosed within  3 backticks at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Please provide an answer which is factually correct and based on the information retrieved from the vector store.
Please also mention any quotes supporting the answer if any present in the context supplied within two double quotes "" .

{context}

QUESTION:```{question}```
ANSWER:
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context","question"]
  )
#
chain_type_kwargs = {"prompt": PROMPT}

In [65]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name='gpt-3.5-turbo-16k',
                                                  openai_api_key=os.environ["OPENAI_API_KEY"],
                                                  temperature=0),
                                  chain_type="stuff",
                                  chain_type_kwargs={"prompt": PROMPT},
                                  retriever=doc_store.as_retriever(search_kwargs={"k": 5}),
                                  return_source_documents=True
                                  )

In [176]:
# run the queries as a batch for efficiency
predictions = qa.batch(examples)

In [50]:
data_samples = {
    'question': [query['query'] for query in predictions],
    'answer': [query['result'] for query in predictions],
    'contexts' : [query['source_documents'] for query in predictions],
    'ground_truths': [query['ground_truths'] for query in predictions]
}

all_context = []
for doc in data_samples['contexts']:
    current_context = []
    for page in doc:
        current_context.append(page.page_content)
    all_context.append(current_context)

data_samples['contexts'] = all_context
dataset = Dataset.from_dict(data_samples)

In [54]:
df = evaluate_dataset(dataset, 'QAchain')

evaluating with [context_precision]


100%|██████████| 1/1 [00:03<00:00,  3.07s/it]


evaluating with [context_recall]


100%|██████████| 1/1 [00:08<00:00,  8.43s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:24<00:00, 24.28s/it]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:03<00:00,  3.93s/it]


Unnamed: 0,question,answer,contexts,ground_truths,context_precision,context_recall,faithfulness,answer_relevancy
0,What is SageMaker?,SageMaker is a fully managed service provided ...,"[# Working with Amazon SageMaker<a name=""examp...",[SageMaker is a fully managed machine learning...,1.0,0.75,1.0,1.0
1,What are all AWS regions where AWS SageMaker i...,The AWS regions where AWS SageMaker is availab...,"[# Working with Amazon SageMaker<a name=""examp...",[AWS SageMaker is available in the following A...,0.5,1.0,0.96,0.976951
2,How to check if an endpoint is KMS encrypted?,"To check if an endpoint is KMS encrypted, you ...","[`KmsKeyId` <a name=""cfn-sagemaker-endpointco...","[To check if an endpoint is KMS encrypted, you...",0.0,1.0,1.0,0.923478
3,What are SageMaker Geospatial capabilities?,SageMaker Geospatial capabilities are operatio...,[# SageMaker geospatial capabilities roles<a n...,[SageMaker Geospatial capabilities refer to th...,0.0,1.0,1.0,0.962918


## Customize chain 1

In [164]:
# Create vectorstore
doc_store = create_vectorstore(
    chunk_size=3000,
    chunk_overlap=200,
)

In [165]:
prompt_template = """You are a virtual assistant for LOKA company, \
always respond with kindness and say hello when necessary,\
use the following "context" to answer the question. If it's not \
mentioned in the "context", politely respond that you don't know.

context:
"{context}"

question: "{new_question}"
answer:"""

QA_PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "new_question"]
        )

question_chain = create_chain(temperature=1, prompt=QA_PROMPT)
predictions = run_predictions(examples, doc_store, question_chain)
dataset = create_dataset(predictions)
df_c1 = evaluate_dataset(dataset, 'Custom Chain1')

## Customize chain 2

In [127]:
doc_store = create_vectorstore(
    chunk_size=1000,
    chunk_overlap=200,
)

In [155]:
prompt_template = """You are a virtual assistant for LOKA company, \
always respond with kindness and say hello when necessary,\
use the following "context" to answer the question. If it's not \
mentioned in the "context", politely respond that you don't know.

context:
"{context}"

question: "{new_question}"
answer:"""

QA_PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "new_question"]
        )

question_chain = create_chain(temperature=1, prompt=QA_PROMPT)
predictions = run_predictions(examples, doc_store, question_chain)
dataset = create_dataset(predictions)
df_c2 = evaluate_dataset(dataset, 'Custom Chain2')

## Customize chain 3

In [139]:
doc_store = create_vectorstore(
    chunk_size=2000,
    chunk_overlap=200,
)

In [156]:
prompt_template = """You are a virtual assistant for LOKA company, \
always respond with kindness and say hello when necessary,\
use the following "context" to answer the question. If it's not \
mentioned in the "context", politely respond that you don't know.

context:
"{context}"

question: "{new_question}"
answer:"""

QA_PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "new_question"]
        )

question_chain = create_chain(temperature=1, prompt=QA_PROMPT)
predictions = run_predictions(examples, doc_store, question_chain)
dataset = create_dataset(predictions)
df_c3 = evaluate_dataset(dataset, 'Custom Chain3')

## Evaluate all proposals

In [172]:
df_all = pd.concat([df, df_c1, df_c2, df_c3])
df_all.head()

Unnamed: 0,question,answer,contexts,ground_truths,context_precision,context_recall,faithfulness,answer_relevancy,chain
0,What is SageMaker?,SageMaker is a fully managed service provided ...,"[# Working with Amazon SageMaker<a name=""examp...",[SageMaker is a fully managed machine learning...,1.0,0.75,1.0,1.0,QAchain
1,What are all AWS regions where AWS SageMaker i...,The AWS regions where AWS SageMaker is availab...,"[# Working with Amazon SageMaker<a name=""examp...",[AWS SageMaker is available in the following A...,0.5,1.0,0.96,0.976951,QAchain
2,How to check if an endpoint is KMS encrypted?,"To check if an endpoint is KMS encrypted, you ...","[`KmsKeyId` <a name=""cfn-sagemaker-endpointco...","[To check if an endpoint is KMS encrypted, you...",0.0,1.0,1.0,0.923478,QAchain
3,What are SageMaker Geospatial capabilities?,SageMaker Geospatial capabilities are operatio...,[# SageMaker geospatial capabilities roles<a n...,[SageMaker Geospatial capabilities refer to th...,0.0,1.0,1.0,0.962918,QAchain
0,What is SageMaker?,Hello! SageMaker is a fully managed service pr...,"[# Working with Amazon SageMaker<a name=""examp...",[SageMaker is a fully managed machine learning...,1.0,1.0,1.0,1.0,Custom Chain1


In [177]:
df_summary = df_all.groupby('chain').mean().reset_index()
df_summary

  df_summary = df_all.groupby('chain').mean().reset_index()


Unnamed: 0,chain,context_precision,context_recall,faithfulness,answer_relevancy
0,Custom Chain1,0.375,1.0,0.72,0.98344
1,Custom Chain2,0.541667,0.6875,1.0,0.992204
2,Custom Chain3,0.458333,0.9375,0.99,0.950675
3,QAchain,0.375,0.9375,0.99,0.965837
