# Advanced RAG Pipeline

## Preparation Stage

In [None]:
# ignore the errors saying "incompatible"

# numpy==1.26.2 leads to an error on google-colab: module 'numpy.linalg.lapack_lite' has no attribute '_ilp64'
# ref: https://github.com/deeppavlov/DeepPavlov/issues/1460
%pip install \
"python-dotenv==1.0.0" \
"openai==1.3.5" \
"llama-index==0.9.8" \
"trulens-eval==0.18.1" \
"streamlit==1.28.2" \
"cohere==4.36" \
"langchain-community==0.2.4" \
"langchain-core==0.2.21" \
"torch==2.1.2" \
"sentence-transformers==2.2.2" \
"numpy==1.24.4" \
"nest-asyncio==1.5.8" \
"pypdf==3.16.1" \
"requests==2.31.0"

# generally try to install the latest version, however it's not recommended as this time may succeed but fail next time
# due to version changing
#%pip install python-dotenv openai llama-index trulens-eval numpy nest-asyncio

# google-colab requires specific version of ipython, and it's not recommended to install/upgrade which may cause
# the runtime to repeatedly crash or behave in unexpected ways

# cannot import name 'Groundedness' from 'trulens_eval.feedback'
# ref: https://github.com/truera/trulens/releases/tag/trulens-eval-0.29.0
'''
%pip install \
"python-dotenv==1.0.1" \
"openai==1.30.1" \
"llama-index==0.10.37" \
"llama-index-agent-openai==0.2.5" \
"llama-index-cli==0.1.12" \
"llama-index-core==0.10.37" \
"llama-index-embeddings-openai==0.1.9" \
"llama-index-indices-managed-llama-cloud==0.1.6" \
"llama-index-llms-openai==0.1.19" \
"llama-index-multi-modal-llms-openai==0.1.6" \
"llama-index-postprocessor-rankllm-rerank==0.1.3" \
"llama-index-program-openai==0.1.6" \
"llama-index-question-gen-openai==0.1.3" \
"llama-index-readers-file==0.1.22" \
"llama-index-readers-llama-parse==0.1.4" \
"llama-parse==0.4.3" \
"llamaindex-py-client==0.1.19" \
"trulens-eval==0.28.2" \
"psutil>=5.9.8"
'''

Collecting python-dotenv==1.0.0
  Downloading python_dotenv-1.0.0-py3-none-any.whl.metadata (21 kB)
Collecting openai==1.3.5
  Downloading openai-1.3.5-py3-none-any.whl.metadata (16 kB)
Collecting llama-index==0.9.8
  Downloading llama_index-0.9.8-py3-none-any.whl.metadata (8.2 kB)
Collecting trulens-eval==0.18.1
  Downloading trulens_eval-0.18.1-py3-none-any.whl.metadata (3.3 kB)
Collecting streamlit==1.28.2
  Downloading streamlit-1.28.2-py2.py3-none-any.whl.metadata (8.1 kB)
Collecting cohere==4.36
  Downloading cohere-4.36-py3-none-any.whl.metadata (5.3 kB)
Collecting langchain-community==0.2.4
  Downloading langchain_community-0.2.4-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core==0.2.21
  Downloading langchain_core-0.2.21-py3-none-any.whl.metadata (6.0 kB)
Collecting torch==2.1.2
  Downloading torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)
Collecting sentence-transformers==2.2.2
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━

'\n%pip install "python-dotenv==1.0.1" "openai==1.30.1" "llama-index==0.10.37" "llama-index-agent-openai==0.2.5" "llama-index-cli==0.1.12" "llama-index-core==0.10.37" "llama-index-embeddings-openai==0.1.9" "llama-index-indices-managed-llama-cloud==0.1.6" "llama-index-llms-openai==0.1.19" "llama-index-multi-modal-llms-openai==0.1.6" "llama-index-postprocessor-rankllm-rerank==0.1.3" "llama-index-program-openai==0.1.6" "llama-index-question-gen-openai==0.1.3" "llama-index-readers-file==0.1.22" "llama-index-readers-llama-parse==0.1.4" "llama-parse==0.4.3" "llamaindex-py-client==0.1.19" "trulens-eval==0.28.2" "psutil>=5.9.8"\n'

In [None]:
%pip list

#%pip show python-dotenv openai llama-index trulens-eval numpy nest-asyncio

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
accelerate                       0.32.1
aiohttp                          3.9.5
aiosignal                        1.3.1
aiostream                        0.5.2
alabaster                        0.7.16
albucore                         0.0.12
albumentations                   1.4.11
alembic                          1.13.2
altair                           4.2.2
annotated-types                  0.7.0
anyio                            3.7.1
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
array_record                     0.5.1
arviz                            0.18.0
asn1crypto                       1.5.1
astropy                          6.1.2
astropy-iers-data                0.2024.7.22.0.34.13
astunparse                       1.6.3
async-timeout                    4.0.3
atpublic                         4.1.0
attrs                   

In [None]:
%%writefile ./.env
OPENAI_API_KEY=sk-GUktlQ4YGz8DdZ1D5f02A83836444b4dAdC7D5F9BcFeFf6d
# replace https://api.openai.com/v1 with API transit address
OPENAI_BASE_URL=https://pro.aiskt.com/v1

# for using models @HuggingFace
HUGGINGFACE_API_KEY=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

Writing ./.env


In [None]:
import os
from dotenv import load_dotenv, find_dotenv

def get_openai_api_key():
    _ = load_dotenv(find_dotenv())
    return os.getenv("OPENAI_API_KEY")

def get_openai_base_url():
    _ = load_dotenv(find_dotenv())
    return os.getenv("OPENAI_BASE_URL")

def get_hf_api_key():
    _ = load_dotenv(find_dotenv())
    return os.getenv("HUGGINGFACE_API_KEY")

# assign corresponding value to api_key before invoking OpenAI(), once setup here, all the following calling from
# other frameworks like LlamaIndex and Trulens will inherit and don't need to config for the same
import openai
openai.api_key = get_openai_api_key()
openai.base_url = get_openai_base_url()
#OPENAI_API_KEY = get_openai_api_key()
#OPENAI_BASE_URL = get_openai_base_url()

import numpy as np

import nest_asyncio
nest_asyncio.apply()

import warnings
warnings.filterwarnings('ignore')

## Naive RAG - Setup

In [None]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./eBook-How_to_Build_Your_Career_in_AI.pdf"]
).load_data()

In [None]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]), "\n")
print(documents[0])

<class 'list'> 

41 

<class 'llama_index.schema.Document'> 

Doc ID: 4b03c0c8-5008-4aed-a805-3dd32df47aeb
Text: PAGE 1Founder, DeepLearning.AICollected Insights from Andrew Ng
How to  Build Your Career in AIA Simple Guide


In [None]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [None]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.llms import OpenAI

llm = OpenAI(
    model="gpt-3.5-turbo",
    temperature=0.1,
)

# customize to use embedding model from HuggingFace
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
)

# when using from_documents, the document will be splitted into chunks and parsed into node objects, which store in memory
# by default, and VectorStoreIndex deals with vectors in batches of 2048 nodes, thus if the memory is constrained, we can
# modify insert_batch_size
# ref: https://docs.llamaindex.ai/en/stable/module_guides/indexing/vector_store_index/
index = VectorStoreIndex.from_documents(
    [document],
    service_context=service_context,
    #insert_batch_size=512,
)

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
query_engine = index.as_query_engine()

response = query_engine.query(
    "What are steps to take when finding projects to build your experience?"
)
print(str(response))

Develop a side hustle, ensure the project will help you grow technically, collaborate with good teammates, and consider if the project can be a stepping stone to larger projects.


## Naive RAG - Evaluation

In [None]:
eval_questions = []
with open('eval_questions_lite.txt', 'r') as file:
    for line in file:
        # remove newline character and convert to integer
        item = line.strip()

        print(item)
        eval_questions.append(item)

What are the keys to building a career in AI?
How can teamwork contribute to success in AI?


In [None]:
from trulens_eval import Tru

tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [None]:
from trulens_eval.feedback.provider.openai import OpenAI
openai = OpenAI()

from trulens_eval import (
    Feedback,
    TruLlama,
    OpenAI,
)

qa_relevance = (
    Feedback(
        openai.relevance_with_cot_reasons,
        name="Answer Relevance",
    )
    # the input is the prompt of user and the output is the generative answer of LLM
    .on_input_output()
)

# the contexts are those retrieved thru RAG
context_selection = TruLlama.select_source_nodes().node.text

qs_relevance = (
    Feedback(
        openai.relevance_with_cot_reasons,
        name="Context Relevance",
    )
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=openai)
#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)

groundedness = (
    Feedback(
        grounded.groundedness_measure_with_cot_reasons,
        name="Groundedness",
    )
    .on(context_selection)
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

feedbacks = [qa_relevance, qs_relevance, groundedness]

def get_trulens_recorder(query_engine, feedbacks, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks,
    )
    return tru_recorder

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks,
    )
    return tru_recorder

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [None]:
tru_recorder = get_prebuilt_trulens_recorder(
    query_engine,
    app_id="Direct Query Engine",
)

with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

# dashboard will be launched by localtunnel service on google-colab
#tru.run_dashboard()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Answer Relevance_calls,Context Relevance_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_28dbb0f01132dd1a4500e83915e58ad9,"""What are the keys to building a career in AI?""","""Learning foundational technical skills, worki...",-,"{""record_id"": ""record_hash_28dbb0f01132dd1a450...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-07-28T08:51:49.922319"", ""...",2024-07-28T08:51:51.862664,1.0,1.0,[{'args': {'prompt': 'What are the keys to bui...,[{'args': {'prompt': 'What are the keys to bui...,1,2107,0.003206
1,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_ebff3cdbab1cb3fb72758099e68f7d6d,"""How can teamwork contribute to success in AI?""","""Teamwork can contribute to success in AI by a...",-,"{""record_id"": ""record_hash_ebff3cdbab1cb3fb727...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-07-28T08:51:52.143025"", ""...",2024-07-28T08:51:53.721299,1.0,0.5,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'prompt': 'How can teamwork contrib...,1,1693,0.002573


## Sentence-window Retrieval - Setup

In [None]:
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage

def build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index",
):
    # create the sentence window node parser with default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        # for each sentence, the parser will include 3 sentences before and after it respectively within the metadata
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )

    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            [document],
            service_context=sentence_context,
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index

def get_sentence_window_query_engine(
    sentence_index,
    similarity_top_k=6,
    rerank_top_n=2,
):
    # define post-processor to replace with sentence-window
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

    # ref: https://docs.llamaindex.ai/en/stable/examples/node_postprocessor/SentenceTransformerRerank/
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n,
        model="BAAI/bge-reranker-base",
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k,
        node_postprocessors=[postproc, rerank],
    )

    return sentence_window_engine

In [None]:
sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index",
)

sentence_window_engine = get_sentence_window_query_engine(sentence_index)

config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

In [None]:
window_response = sentence_window_engine.query(
    "How do I get started on a personal project in AI?"
)
print(str(window_response), "\n")

# compare the content from sentence-window with the original sentence
window = window_response.source_nodes[0].node.metadata["window"]
sentence = window_response.source_nodes[0].node.metadata["original_text"]

print(f"Original sentence:\n{sentence}\n")
print(f"Sentence-window content:\n{window}")

To get started on a personal project in AI, you should first identify a project that aligns with your career goals and interests. Once you have chosen a project, focus on scoping it effectively by defining the problem statement, setting clear objectives, and outlining the steps needed to achieve them. Consider projects that are responsible, ethical, and beneficial to society. As you progress, aim to work on projects that increase in complexity and impact over time, building a portfolio that demonstrates your skill progression in AI. 

Original sentence:
In the previous chapter, 
I wrote about how to identify and scope AI projects. 

Sentence-window content:
I hope these steps will guide you in exploring it 
through project work — even if you don’t yet have deep expertise in that field.  AI won’t solve every 
problem, but as a community, let’s look for ways to make a positive impact wherever we can.Scoping Successful AI Projects CHAPTER 4
Step 3
Step 4
Step 5

PAGE 17Finding Projects th

## Sentence-window Retrieval - Evaluation

In [None]:
tru.reset_database()

tru_recorder_sentence_window = get_prebuilt_trulens_recorder(
    sentence_window_engine,
    app_id = "Sentence-window Query Engine",
)

for question in eval_questions:
    with tru_recorder_sentence_window as recording:
        response = sentence_window_engine.query(question)
        print(question)
        print(str(response))

What are the keys to building a career in AI?
Learning foundational technical skills, working on projects, finding a job, and being part of a supportive community are the keys to building a career in AI.
How can teamwork contribute to success in AI?
Teammates play a crucial role in the success of AI projects. Working collaboratively with colleagues who are dedicated, continuously learning, and focused on building AI for the benefit of all can positively influence one's own work ethic and outcomes. The ability to work effectively in a team, leverage diverse perspectives, and collectively steer projects towards success is essential in the field of AI.


In [None]:
leaderboard = tru.get_leaderboard(app_ids=[])
leaderboard.head()
#tru.run_dashboard()

Unnamed: 0_level_0,Groundedness,Context Relevance,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Sentence-window Query Engine,0.533333,0.7,1.0,8.0,0.000881


## Auto-merging Retrieval - Setup

In [None]:
from llama_index.node_parser import HierarchicalNodeParser
from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine

def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
            service_context=merging_context,
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )

    return automerging_index

def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=2,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)

    # ref: https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/
    retriever = AutoMergingRetriever(
        base_retriever,
        automerging_index.storage_context,
        verbose=True,
    )

    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n,
        model="BAAI/bge-reranker-base",
    )

    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever,
        node_postprocessors=[rerank],
    )

    return auto_merging_engine

In [None]:
automerging_index = build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
)

automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

In [None]:
auto_merging_response = automerging_query_engine.query(
    "How do I build a portfolio of AI projects?"
)
print(str(auto_merging_response))

> Merging 1 nodes into parent node.
> Parent node id: 5c30fac0-9e59-4624-9076-cd8c2cad3c40.
> Parent node text: PAGE 21Building a Portfolio of 
Projects that Shows 
Skill Progression CHAPTER 6
PROJECTS

> Merging 1 nodes into parent node.
> Parent node id: 653fad11-17ff-4ee7-a3a6-3af6f3b4d3bc.
> Parent node text: PAGE 21Building a Portfolio of 
Projects that Shows 
Skill Progression CHAPTER 6
PROJECTS

Building a portfolio of AI projects involves showcasing a progression from simple to complex undertakings over time. It is important to be able to effectively communicate your thinking to others to demonstrate the value of your work and gain trust for larger projects. Identifying worthwhile ideas to work on and gaining experience in various industries through projects are key steps in building a strong portfolio in AI.


## Auto-merging Retrieval - Evaluation

In [None]:
tru.reset_database()

tru_recorder_automerging = get_prebuilt_trulens_recorder(
    automerging_query_engine,
    app_id="Auto-merging Query Engine",
)

for question in eval_questions:
    with tru_recorder_automerging as recording:
        response = automerging_query_engine.query(question)
        print(question)
        print(str(response))

> Merging 2 nodes into parent node.
> Parent node id: 1b4a6c82-3796-4982-99cf-929446d27bbc.
> Parent node text: PAGE 3Table of 
ContentsIntroduction: Coding AI is the New Literacy.
Chapter 1: Three Steps to Ca...

> Merging 1 nodes into parent node.
> Parent node id: 37c3031e-ca1a-4195-938b-cfdf09a285df.
> Parent node text: PAGE 3Table of 
ContentsIntroduction: Coding AI is the New Literacy.
Chapter 1: Three Steps to Ca...

What are the keys to building a career in AI?
The keys to building a career in AI are learning foundational technical skills, working on projects to deepen skills and create impact, and finding a job, all while being part of a supportive community.
How can teamwork contribute to success in AI?
Teamwork can contribute to success in AI by enhancing the ability to collaborate effectively with others. Working in teams allows individuals to leverage diverse perspectives, share knowledge, and collectively tackle complex projects. This collaborative environment fosters inn

In [None]:
leaderboard = tru.get_leaderboard(app_ids=[])
leaderboard.head()
#tru.run_dashboard()

Unnamed: 0_level_0,Groundedness,Context Relevance,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Auto-merging Query Engine,0.708333,0.675,1.0,9.0,0.000629
