# Lesson 1: Advanced RAG Pipeline

In [4]:
# llama-index==v0.9.14.post3
# llama-index-readers-file==0.1.13
# langchain[openai]==0.0.343
# openai==1.3.5
# python-dotenv==1.0.1
# datasets==2.18.0
# sentence_transformers
# trulens==0.13.4
# trulens-eval==0.20.0
# https://github.dev/openai/openai-cookbook/blob/main/examples/evaluation/Evaluate_RAG_with_LlamaIndex.ipynb

In [5]:
from datasets import load_dataset
import pandas as pd

xsum_dataset = load_dataset(
    "xsum", version="1.2.0"
)
xsum_sample = xsum_dataset["train"].select(range(1000)).to_pandas()
xsum_sample.head(2)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Unnamed: 0,document,summary,id
0,"The full cost of damage in Newton Stewart, one...",Clean-up operations are continuing across the ...,35232142
1,A fire alarm went off at the Holiday Inn in Ho...,Two tourist buses have been destroyed by fire ...,40143035


In [6]:
xsum_sample["combined"] = (
    "Document: " + xsum_sample.document.str.strip() + "; Summary: " + xsum_sample.summary.str.strip()
)

In [7]:
!mkdir -p 'document/'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
for i, document in enumerate(xsum_sample["combined"]):
    file_name = f'document/document_{i+1}.txt'  # Generate a unique filename for each document
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(document)  # Write each document to its own file

In [8]:
# conda activate llama-index

## RetrieverEvaluator

In [24]:
from llama_index import SimpleDirectoryReader

loader = SimpleDirectoryReader(input_dir="./document/")
documents = loader.load_data()

In [25]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

1000 

<class 'llama_index.schema.Document'>
Doc ID: 656cd4d1-5c4a-49f5-ad60-600629e6a8fd
Text: Document: The full cost of damage in Newton Stewart, one of the
areas worst affected, is still being assessed. Repair work is ongoing
in Hawick and many roads in Peeblesshire remain badly affected by
standing water. Trains on the west coast mainline face disruption due
to damage at the Lamington Viaduct. Many businesses and householders
were aff...


In [42]:
from dotenv import load_dotenv, find_dotenv

import os
import openai
from llama_index.node_parser import SimpleNodeParser

_ = load_dotenv(find_dotenv())
openai.api_key = os.getenv('OPENAI_API_KEY')

# Define an LLM
llm = OpenAI(model="gpt-4")

# Build index with a chunk_size of 512
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
vector_index = VectorStoreIndex(nodes)

In [28]:
# build a query engine
query_engine = index.as_query_engine()

In [29]:
response = query_engine.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)
print(str(response))

You may want to explore the Harry Potter series of books written by J.K. Rowling, which have sold over 450 million copies worldwide since 1997. Additionally, there are eight film adaptations based on the books. Another interesting aspect to look into is "Harry Potter and the Cursed Child," a play that has received five-star reviews from critics and is considered a game-changing production.


In [30]:
response.source_nodes[0].get_text()[:1000]

'Ben Brantley in the New York Times writes: "Like the novels that preceded it, The Cursed Child is stuffed with arcana-filled plots that defy diagrams and baldly wrought sentimental life lessons, along with anguished dives into the earnest, tortured solipsism of adolescence.\n"By rights, such a combination should try the patience of any grown-up. But like Ms Rowling\'s books, the play vanquishes resistance."\nThe Harry Potter books have sold more than 450 million copies since 1997 and been adapted into eight films.\nThe script of Harry Potter and the Cursed Child is published this weekend.\nFollow us on Twitter @BBCNewsEnts, on Instagram at bbcnewsents, or email entertainment.news@bbc.co.uk.; Summary: Harry Potter and the Cursed Child has won five-star reviews from critics, with one describing it as "a game-changing production".\n\nDocument: There were celebrations in Europe after Germany surrendered.\nBut on the other side of the world in the Pacific Ocean, Japan was still fighting ag

In [31]:
from llama_index.evaluation import generate_question_context_pairs

qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=2
)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1625/1625 [1:25:29<00:00,  3.16s/it]


#### Retrieval Evaluation:

We are now prepared to conduct our retrieval evaluations. We will execute our `RetrieverEvaluator` using the evaluation dataset we have generated.

We first create the `Retriever` and then define two functions: `get_eval_results`, which operates our retriever on the dataset, and `display_results`, which presents the outcomes of the evaluation.

In [34]:
retriever = vector_index.as_retriever(similarity_top_k=2)

In [37]:
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)

In [38]:
# Evaluate
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

In [43]:
def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metrics =  {"Retriever Name": [name], "Hit Rate": [hit_rate], "MRR": [mrr]}
    return metrics

In [44]:
display_results("OpenAI Embedding Retriever", eval_results)

{'Retriever Name': ['OpenAI Embedding Retriever'],
 'Hit Rate': [0.9273846153846154],
 'MRR': [0.8507692307692307]}

## Basic RAG pipeline

In [45]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [46]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-small-en-v1.5"
)
index = VectorStoreIndex.from_documents([document], service_context=service_context)

In [47]:
query_engine = index.as_query_engine()

In [48]:
response = query_engine.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)
print(str(response))

You may want to explore the Harry Potter series of books written by J.K. Rowling, which have sold over 450 million copies worldwide since 1997. Additionally, there are eight film adaptations based on the books. Another interesting aspect to look into is the script of "Harry Potter and the Cursed Child," which has received five-star reviews from critics and is considered a game-changing production.


### Creating an Evaluation Dataset

In [49]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

I'm looking for the information of Harry Potter. What could you suggest to me?


In [50]:
new_question = "What is the Harry Potter show for me?"
eval_questions.append(new_question)

In [51]:
print(eval_questions)

["I'm looking for the information of Harry Potter. What could you suggest to me?", 'What is the Harry Potter show for me?']


In [52]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [53]:
from trulens_eval import (
    Feedback,
    TruLlama,
    OpenAI
)
from trulens_eval.feedback import Groundedness
import numpy as np

openai = OpenAI()

# Answer Relevance
qa_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input_output()
)

# Context Relevance
# on_input - pointer to user query
# on - pointer to retrieved contexts (intermediate results
# aggregate - aggregate score across all retrieved content
qs_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
    .on_input()
    .on(TruLlama.select_source_nodes().node.text)
    .aggregate(np.mean)
)

# Groundedness
grounded = Groundedness(groundedness_provider=openai)
groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
        .on(TruLlama.select_source_nodes().node.text)
        .on_output()
        .aggregate(grounded.grounded_statements_aggregator)
)
feedbacks = [qa_relevance, qs_relevance, groundedness]

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [54]:
tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine")

In [55]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

In [56]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [57]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Answer Relevance_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_29639e8d5d6741bbe95b76f7f970d133,"""I'm looking for the information of Harry Pott...","""You could explore the Harry Potter series of ...",-,"{""record_id"": ""record_hash_29639e8d5d6741bbe95...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-04-07T19:24:07.991170"", ""...",2024-04-07T19:24:20.952760,1.0,[{'args': {'prompt': 'I'm looking for the info...,12,2209,0.003352
1,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_a9161ffafd75b8d3a389ceba6fabb9cc,"""What is the Harry Potter show for me?""","""The Harry Potter show is described as a \""gam...",-,"{""record_id"": ""record_hash_a9161ffafd75b8d3a38...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-04-07T19:24:21.115404"", ""...",2024-04-07T19:24:22.804194,,,1,2196,0.003324


In [58]:
# launches on http://localhost:8501/
tru.run_dashboard()

Starting dashboard ...


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.1.156:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

## Advanced RAG pipeline

### 1. Sentence Window retrieval

In [59]:
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage

def build_sentence_window_index(
    document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            [document], service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index

In [60]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [61]:
sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

In [62]:
def get_sentence_window_query_engine(
    sentence_index,
    similarity_top_k=6,
    rerank_top_n=2,
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

sentence_window_engine = get_sentence_window_query_engine(sentence_index)

config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

In [63]:
window_response = sentence_window_engine.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)
print(str(window_response))

python(14515) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


I would suggest looking into the Harry Potter books, which have sold over 450 million copies since 1997 and have been adapted into eight films. Additionally, you may want to explore the script of Harry Potter and the Cursed Child, which is being published this weekend and has received five-star reviews from critics.


In [64]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

tru_recorder_sentence_window = get_prebuilt_trulens_recorder(
    sentence_window_engine,
    app_id = "Sentence Window Query Engine"
)

In [65]:
for question in eval_questions:
    with tru_recorder_sentence_window as recording:
        response = sentence_window_engine.query(question)
        print(question)
        print(str(response))

I'm looking for the information of Harry Potter. What could you suggest to me?
You might want to explore the Harry Potter books, which have sold over 450 million copies worldwide since 1997 and have been adapted into eight films. Additionally, the script of Harry Potter and the Cursed Child has been published and has received five-star reviews from critics, with one describing it as "a game-changing production."
What is the Harry Potter show for me?
The Harry Potter show is a production that has received five-star reviews from critics, with one reviewer describing it as "a game-changing production."


In [66]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Context Relevance,Groundedness,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Sentence Window Query Engine,0.4,1.0,1.0,7.5,0.000699


In [67]:
# launches on http://localhost:8501/
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.1.156:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

### 2. Auto-merging retrieval

In [68]:
from llama_index.node_parser import HierarchicalNodeParser

from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine

def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index

In [None]:
automerging_index = build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index"
)

In [None]:
def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=2,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

In [None]:
automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

In [None]:
auto_merging_response = automerging_query_engine.query(
     "I'm looking for the information of Harry Potter. What could you suggest to me?"
)
print(str(auto_merging_response))

In [None]:
tru.reset_database()

tru_recorder_automerging = get_prebuilt_trulens_recorder(automerging_query_engine,
                                                         app_id="Automerging Query Engine")

In [None]:
for question in eval_questions:
    with tru_recorder_automerging as recording:
        response = automerging_query_engine.query(question)
        print(question)
        print(response)

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()