In [1]:
import nest_asyncio
import random

nest_asyncio.apply()
from dotenv import load_dotenv

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.core.prompts import PromptTemplate

from llama_index.core.evaluation import (
    DatasetGenerator,
    FaithfulnessEvaluator,
    RelevancyEvaluator
)
from llama_index.llms.openai import OpenAI

import openai
import time
import os
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

### Read docs

In [43]:
data_dir = "./data"
documents = SimpleDirectoryReader(data_dir).load_data()

DEBUG:llama_index.core.readers.file.base:> [SimpleDirectoryReader] Total files added: 4
> [SimpleDirectoryReader] Total files added: 4
> [SimpleDirectoryReader] Total files added: 4
DEBUG:fsspec.local:open file: /Users/thomaschang/Projects/RAG_Techniques/data/Understanding_Climate_Change.pdf
open file: /Users/thomaschang/Projects/RAG_Techniques/data/Understanding_Climate_Change.pdf
open file: /Users/thomaschang/Projects/RAG_Techniques/data/Understanding_Climate_Change.pdf
DEBUG:fsspec.local:open file: /Users/thomaschang/Projects/RAG_Techniques/data/nike_2023_annual_report.txt
open file: /Users/thomaschang/Projects/RAG_Techniques/data/nike_2023_annual_report.txt
open file: /Users/thomaschang/Projects/RAG_Techniques/data/nike_2023_annual_report.txt
DEBUG:fsspec.local:open file: /Users/thomaschang/Projects/RAG_Techniques/data/q_a.json
open file: /Users/thomaschang/Projects/RAG_Techniques/data/q_a.json
open file: /Users/thomaschang/Projects/RAG_Techniques/data/q_a.json


In [44]:
num_eval_questions = 25

eval_documents = documents[0:20]

print(documents[13])

Doc ID: ac937dea-3f3a-4a7b-a333-363029a0df0d
Text: Freshwater Ecosystems   Freshwater ecosystems, including rivers,
lakes, and wetlands, are affected by changes in  precipitation
patterns, temperature, and water flow. These changes can lead to
altered water  quality, habitat loss, and reduced biodiversity.
Freshwater species, inclu ding fish and  amphibians, are particularly
at risk.   Conservat...


It seems that `SimpleDirectoryReader` loads a pdf as multiple "documents". I should understand why that is.

In [45]:
# For my own knowledge to understand when this library calls an LLM
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [47]:
data_generator = DatasetGenerator(eval_documents)
eval_questions = data_generator.generate_questions_from_nodes()
k_eval_questions = random.sample(eval_questions, num_eval_questions)
print(k_eval_questions)

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Ruminant animals, such as cows and sheep, produ...
> Adding chunk: Ruminant animals, such as cows and sheep, produ...
> Adding chunk: Ruminant animals, such as cows and sheep, produ...
> Adding chunk: Ruminant animals, such as cows and sheep, produ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chu

  data_generator = DatasetGenerator(eval_documents)


DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'user', 'content': 'Context information is below.\n---------------------\npage_label: 18\nfile_name: Understanding_Climate_Change.pdf\nfile_path: /Users/thomaschang/Projects/RAG_Techniques/data/Understanding_Climate_Change.pdf\nfile_type: application/pdf\nfile_size: 206372\ncreation_date: 2024-10-03\nlast_modified_date: 2024-09-27\n\nVision for a Sustainable Future  \nHolistic Approach  \nAddressing climate change requires a holistic approach that integrates environmental, social, \nand economic dimensions. Sustainable development, circular economy, and ecological justice \nare key principles guiding this approach. Collaboration across sectors and scales is essential \nfor achieving a sustainable future.  \nInnovation and Creativity  \nInnovation and creativity are vital for developing new solutions to climate challenges. This \nincludes technolog

  return QueryResponseDataset(queries=queries, responses=responses_dict)


In [48]:
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-4o-mini")

faithfulness_evaluator = FaithfulnessEvaluator()

relevancy_evaluator = RelevancyEvaluator()


The original notebook sets a custom prompt for the faithfulness evaluator. I will not do that here just to see what happens.

In [49]:
def evaluate_response_time_and_accuracy(chunk_size, eval_questions):
    total_response_time = 0
    total_faithfulness = 0
    total_relevancy = 0

    llm = OpenAI(model="gpt-4o-mini")

    vector_index = VectorStoreIndex.from_documents(eval_documents, llm=llm, chunk_size=chunk_size)
    query_engine = vector_index.as_query_engine()


    for question in eval_questions:
        start_time = time.time()
        response = query_engine.query(question)
        elapsed_time = time.time() - start_time

        faithfulness_result = faithfulness_evaluator.evaluate_response(response=response).passing
        relevancy_result = relevancy_evaluator.evaluate_response(query=question, response=response).passing

        total_response_time += elapsed_time
        total_faithfulness += faithfulness_result
        total_relevancy += relevancy_result

    average_response_time = total_response_time / num_eval_questions
    average_faithfulness = total_faithfulness / num_eval_questions
    average_relevancy = total_relevancy / num_eval_questions

    return average_response_time, average_faithfulness, average_relevancy


### Evaluate chunk sizes

In [50]:
chunk_sizes = [64, 256]

for chunk_size in chunk_sizes:
    avg_response_time, avg_faithfulness, avg_relevancy = evaluate_response_time_and_accuracy(chunk_size, k_eval_questions)
    print(f"Chunk size {chunk_size} - Average Response time: {avg_response_time:.2f}s, Average Faithfulness: {avg_faithfulness:.2f}, Average Relevancy: {avg_relevancy:.2f}")


DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
> Adding chunk: Understanding Climate Change  
Chapter 1: Intro...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
> Adding chunk: Coal is the most carbon -intensive fossil fuel,...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Ruminant animals, such as cows and sheep, produ...
> Adding chunk: Ruminant animals, such as cows and sheep, produ...
> Adding chunk: Ruminant animals, such as cows and sheep, produ...
> Adding chunk: Ruminant animals, such as cows and sheep, produ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chu

KeyboardInterrupt: 