# Evaluating a RAG Application with RAGAs

In [1]:
pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
[31mERROR: Invalid requirement: '"unstructured[pdf]"' (from line 7 of requirements.txt)[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import os
import openai
import ragas 
from dotenv import load_dotenv
import langchain
from langchain.document_loaders import DirectoryLoader

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [4]:
# Load documents
from langchain.document_loaders import DirectoryLoader
loader = DirectoryLoader("docs/")
documents = loader.load()

## Splitter

In [5]:
from langchain.text_splitter import CharacterTextSplitter

# Splitter of documents:
# chunksize: Size of each fragment
# overlap: Overlap between fragments
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
print("Número de chunks generados = ",len(chunks))

Created a chunk of size 717, which is longer than the specified 500
Created a chunk of size 571, which is longer than the specified 500
Created a chunk of size 583, which is longer than the specified 500
Created a chunk of size 670, which is longer than the specified 500
Created a chunk of size 532, which is longer than the specified 500
Created a chunk of size 2977, which is longer than the specified 500
Created a chunk of size 545, which is longer than the specified 500
Created a chunk of size 1246, which is longer than the specified 500
Created a chunk of size 1212, which is longer than the specified 500
Created a chunk of size 596, which is longer than the specified 500
Created a chunk of size 932, which is longer than the specified 500
Created a chunk of size 968, which is longer than the specified 500
Created a chunk of size 2710, which is longer than the specified 500
Created a chunk of size 2415, which is longer than the specified 500
Created a chunk of size 2728, which is long

Número de chunks generados =  4571


## Embeddings

In [6]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions

# Instancia para interactuar con la BBDD
client = weaviate.Client(
  embedded_options = EmbeddedOptions()
)

# Generar embeddings utilizando OpenAI y rellenar la BBBDD vectorial con esos embeddings
vectorstore = Weaviate.from_documents(
    client = client,
    documents = chunks,
    embedding = OpenAIEmbeddings(),
    by_text = False
)


            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            
{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-02-15T10:04:27+01:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-02-15T10:04:27+01:00"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-02-15T10:04:27+01:00"}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50060","time":"2024-02-15T10:04:27+01:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time"

Started /home/raquelcolorado/.cache/weaviate-embedded: process ID 328884


{"level":"info","msg":"Created shard langchain_1dc0699907044ef5bb060c123d41ef47_QAwBY16onmNP in 496.191µs","time":"2024-02-15T10:04:28+01:00"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-02-15T10:04:28+01:00","took":45431}
{"level":"info","msg":"Completed loading shard langchain_6196a1f350e0474f9e1e6dd9608dd2da_nIml9bfknTmH in 6.341565ms","time":"2024-02-15T10:04:28+01:00"}
{"action":"hnsw_vector_cache_prefill","count":7000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-02-15T10:04:28+01:00","took":57195733}
/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/


## Retriever

In [7]:
# Retriever for semantic search in vectorial bbdd
# Parameters
#  - search_type (Optional[str]) – Defines the type of search that the Retriever should perform. Can be “similarity” (default), “mmr”, or “similarity_score_threshold”.
#  - search_kwargs (Optional[Dict]) – Keyword arguments to pass to the search function. Can include things like:
#      - k: Amount of documents to return (Default: 4) score_threshold: Minimum relevance threshold for similarity_score_threshold
#      - fetch_k: Amount of documents to pass to MMR algorithm (Default: 20) lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5)
#      - filter: Filter by document metadata

retriever = vectorstore.as_retriever()


## Prompt template

In [8]:
# Definir prompt template

from langchain.prompts import ChatPromptTemplate

template = """
Role and Goal: Acts as a financial analyst, synthesizing information from various reports and documents related to a company. Provides clear and concise responses to questions about the company's operations, number of stores or locations, divisions, and a summary of each division including its contribution to total sales.
Constraints: Should not make predictions or give financial advice. Focuses on analyzing and summarizing existing data about the company's structure and financial performance.
Guidelines: Responses should be factual, data-driven, and based on the provided reports and documents. Should provide details about the company's operations, including locations, divisions, and their financial significance.
Clarification: Offers detailed guidance on what additional data might be helpful when information is unclear or incomplete, suggesting specific reports or data types.
Sources: Provides clear citations for the source of its information, following the format 'Source: [Document Name]'.
Personalization: Maintains a consistent formal tone throughout the interaction, ensuring professionalism and adherence to the style of financial reporting. If you don't know the answer, just say: "I don't know the answer".
The format of the conversation is:
  Question: {question}
  Context: {context}
  Answer:
"""

prompt = ChatPromptTemplate.from_template(template)

In [9]:
from langchain.chat_models import ChatOpenAI

# Define LLM a utilizar
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [10]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

# Setup RAG pipeline
# RunnablePassthrough() pasa la pregunta sin modificar.
rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
# Pairs of questions and ground_truths from which inference will be made to obtain context and final response
questions = ["What currency are the company’s figures in?",
             "When does the fiscal year ends?",
             "What does the company do?",
             "How many divisions does the company have, small summary of each one and weight of each division as a percentage of total sales?",
            ]
ground_truths = [["The figures provided in the company's reports are in US dollars"],
                ["The fiscal year for the company ends on the Saturday closest to January 31st each year, which may occur on a date following January 31st"],
                ["Academy Sports + Outdoors is a company that operates as a premier sports and outdoors retailer. They focus on providing gear and support for people to be active, stay healthy, and enjoy time with family and friends engaging in activities they love.\
                  Here are some key numbers and percentages from the company's financial highlights in 2022:\
                Net Sales: $6.4 billion\
                Comparable Sales: -6.4%\
                E-commerce Sales: 10.7% of total merchandise sales\
                Gross Margin: 34.6% of sales, generating $2.2 billion\
                E-commerce sales fulfilled through stores: over 75%\
                Pro Forma Adjusted Earnings Per Share - Diluted: $7.70"],
                ["Academy Sports + Outdoors operates with a product assortment strategy that focuses on key categories rather than distinct business divisions. The company's product mix is broadly categorized into outdoor, apparel, sports & recreation, and footwear. Each category contributes significantly to the company's net sales, showcasing the diverse range of products that cater to the varied interests and needs of its customers.\
                1. Outdoor (31% of 2022 Net Sales):\
                The outdoor category encompasses a wide range of products designed for outdoor activities and adventures. This includes camping gear, fishing equipment, hunting accessories, and more, catering to enthusiasts who enjoy spending time in nature and partaking in outdoor recreational activities.\
                2. Apparel (28% of 2022 Net Sales):\
                The apparel category offers a variety of clothing options suitable for sports, outdoor activities, and casual wear. This includes performance wear, sports team apparel, and everyday clothing items, ensuring customers can find the right attire for their active lifestyle or leisure needs.\
                3. Sports & Recreation (21% of 2022 Net Sales):\
                This category covers a broad spectrum of sports equipment, fitness gear, and recreational items. From team sports essentials to individual fitness equipment, this category is geared towards supporting a wide array of sports and recreational activities, promoting a healthy and active lifestyle.\
                4. Footwear (20% of 2022 Net Sales):\
                The footwear category includes a diverse selection of shoes for various purposes, including sports-specific shoes, outdoor boots, casual sneakers, and more. This range ensures that customers have the appropriate footwear for their sporting activities, outdoor adventures, or everyday use.\
                These categories collectively form the core of Academy Sports + Outdoors' product strategy, each contributing a substantial portion to the company's net sales. The balanced distribution of sales across these categories highlights the company's commitment to providing a comprehensive and diversified product assortment, catering to the broad preferences and needs of its customers."]
                ]

In [12]:
from datasets import Dataset

answers = []
contexts = []

# Inference
for query in questions:
  answers.append(rag_chain.invoke(query))
  contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])

data = {
    "question": questions,
    "answer": answers,
    "contexts": contexts,
    "ground_truths": ground_truths
}

dataset = Dataset.from_dict(data)

/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dic

In [13]:
import pandas as pd
df = pd.DataFrame(dataset)
df

Unnamed: 0,question,answer,contexts,ground_truths
0,What currency are the company’s figures in?,The company's figures are in US dollars (USD)....,[104\n\n16. Selected Quarterly Financial Data ...,[The figures provided in the company's reports...
1,When does the fiscal year ends?,The fiscal year for the company ends on the Sa...,[Fiscal Year\n\nEnded\n\nWeeks\n\n2021\n\nJanu...,[The fiscal year for the company ends on the S...
2,What does the company do?,The company engages in sponsorship agreements ...,"[Marketing, We periodically enter into sponsor...",[Academy Sports + Outdoors is a company that o...
3,"How many divisions does the company have, smal...","I apologize, but I am unable to provide a resp...","[5,659,892\n\n34,764\n\n29,341\n\n4,829,897 (1...",[Academy Sports + Outdoors operates with a pro...


In [14]:
from ragas import evaluate

from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision
)

result = evaluate(
    dataset = dataset,
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy
    ],
)


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/16 [00:00<?, ?it/s]/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
/home/raquelcolorado/.local/lib/python3.10/site-packages/pydantic/main.py:1024: PydanticDeprecatedSince20: The `dict` method is deprec

In [15]:
# Transform result into df
import pandas as pd

df = result.to_pandas()
df.drop("ground_truth", axis=1, inplace=True)

df

Unnamed: 0,question,answer,contexts,ground_truths,context_precision,context_recall,faithfulness,answer_relevancy
0,What currency are the company’s figures in?,The company's figures are in US dollars (USD)....,[104\n\n16. Selected Quarterly Financial Data ...,[The figures provided in the company's reports...,1.0,0.0,1.0,0.975004
1,When does the fiscal year ends?,The fiscal year for the company ends on the Sa...,[Fiscal Year\n\nEnded\n\nWeeks\n\n2021\n\nJanu...,[The fiscal year for the company ends on the S...,1.0,1.0,1.0,0.971705
2,What does the company do?,The company engages in sponsorship agreements ...,"[Marketing, We periodically enter into sponsor...",[Academy Sports + Outdoors is a company that o...,0.833333,0.666667,1.0,0.823788
3,"How many divisions does the company have, smal...","I apologize, but I am unable to provide a resp...","[5,659,892\n\n34,764\n\n29,341\n\n4,829,897 (1...",[Academy Sports + Outdoors operates with a pro...,1.0,1.0,,0.846143


In [16]:
# Exportar DataFrame a un archivo Excel
df.to_excel('evaluation.xlsx', index=False)