# Lesson 1: Advanced RAG Pipeline

In [None]:
!pip install llama-index-core llama-index-utils-workflow  --quiet
!pip install llama-index-llms-openai-like --quiet
!pip install llama-index-embeddings-huggingface --quiet
!pip install llama-index --quiet
!pip install trulens trulens-eval trulens-providers-openai trulens-providers-litellm --quiet
!pip install trulens-apps-llamaindex --quiet

In [None]:
import os

In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [None]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

## Basic RAG pipeline

In [None]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [None]:
import getpass
import os

INFERENCE_SERVER_URL = "http://localhost:8989"
MODEL_NAME = "ibm-granite/granite-3.3-2b-instruct"
API_KEY= "alanliuxiang"

from llama_index.llms.openai_like import OpenAILike

llm = OpenAILike(
  model=MODEL_NAME,
  api_key=API_KEY,
  api_base= f"{INFERENCE_SERVER_URL}/v1",
  context_window=1234,
  is_chat_model=True,  # supports chat completions
  is_function_calling_model=True # supports tools/functions in the api
)


In [None]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter

# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
embed_model = HuggingFaceEmbedding()

Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 512
Settings.context_window = 4096

In [None]:
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents)

In [None]:
query_engine = index.as_query_engine()

In [None]:
response = query_engine.query(
    "What are steps to take when finding projects to build your experience?"
)
print(str(response))

## Evaluation setup using TruLens

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
# You can try your own question:
new_question = "What is the right AI job for me?"
eval_questions.append(new_question)

In [None]:
print(eval_questions)

In [None]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

In [None]:
from trulens.providers.openai import OpenAI
from trulens_eval.feedback.provider.endpoint.openai import OpenAIClient
from trulens_eval.utils.pyschema import Class
import openai as oai

# Define the client class and client kwargs
client_cls = Class.of_class(oai.OpenAI)
client_kwargs = {
    "api_key": "alanliuxiang",
    "base_url": "http://localhost:8989/v1"
}

# Initialize the OpenAIClient with the custom base URL
client = OpenAIClient(client_cls=client_cls, client_kwargs=client_kwargs)

provider = OpenAI(model_engine=MODEL_NAME,
                  client=client,
)

1. Answer Relevance¶

In [None]:
from trulens_eval import Feedback

f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
).on_input_output()

2. Context Relevance¶

In [None]:
# from trulens_eval import TruLlama
from trulens.apps.llamaindex import TruLlama

context_selection = TruLlama.select_source_nodes().node.text

In [None]:
import numpy as np

f_qs_relevance = (
    Feedback(provider.qs_relevance,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

3. Groundedness

In [None]:
from trulens.core import TruSession

session = TruSession()
session.reset_database()

In [None]:
import pandas as pd

data = {
    "query": ["what is AI?"],
    "query_id": ["1"],
    "expected_response": ["Artificial Intelligence"],
    "expected_chunks": [
        [
            {
                "text": "AI is the simulation of human intelligence processes by machines, especially computer systems.",
                "title": "AI is not a bubble :(",
                "expected_score": 0.9,
            },
            {
                "text": "AI is the evil overlod that's going to rule over all human beings.",
                "title": "AI should be feared",
                "expected_score": 0.4,
            },
            {
                "text": "AI is the future of humanity.",
                "title": "AI is the future",
                "expected_score": 0.5,
            },
        ],
    ],
}

df = pd.DataFrame(data)

In [None]:
session.add_ground_truth_to_dataset(
    dataset_name="test_dataset_ir",
    ground_truth_df=df,
    dataset_metadata={"domain": "Random IR dataset"},
)

In [None]:
ground_truth_df = session.get_ground_truth("test_dataset_ir")

In [None]:
from trulens_eval.feedback import GroundTruthAgreement

grounded = GroundTruthAgreement(ground_truth_df,provider=provider)

In [None]:
f_groundedness = (
    Feedback(grounded.agreement_measure,
             name="Groundedness"
            )
    .on(context_selection)
    .on_output()
    # .aggregate(grounded.grounded_statements_aggregator)
)

In [None]:
from trulens.apps.llamaindex import TruLlama

tru_recorder = TruLlama(
    # sentence_window_engine,
    query_engine,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [None]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [None]:
records.head()

In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()

## Advanced RAG pipeline

### 1. Sentence Window retrieval

In [None]:
window_response = query_engine.query(
    "how do I get started on a personal project in AI?"
)
print(str(window_response))

In [None]:
tru.reset_database()

from trulens.apps.llamaindex import TruLlama

tru_recorder = TruLlama(
    query_engine,
    app_id="1-Sentence Window retrieval",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        query_engine.query(question)

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()

In [None]:
# import os
# os.environ["TENSORBOARD_PROXY_URL"]= os.environ["NB_PREFIX"]+"/proxy/6006/"

In [None]:
# %load_ext tensorboard

### 2. Auto-merging retrieval

In [None]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter

# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
embed_model = HuggingFaceEmbedding()

Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 512
Settings.context_window = 4096

In [None]:
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import StorageContext, load_index_from_storage

save_dir="./merging_index",
chunk_sizes = [2048, 512, 128]
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
nodes = node_parser.get_nodes_from_documents(documents)
leaf_nodes = get_leaf_nodes(nodes)


embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# embed_model = HuggingFaceEmbedding()
Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = node_parser
Settings.num_output = 512
Settings.context_window = 4096


storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

if not os.path.exists("./merging_index"):
    automerging_index = VectorStoreIndex(
        leaf_nodes, 
        storage_context=storage_context, 
    )
    automerging_index.storage_context.persist(persist_dir="./merging_index")
else:
    automerging_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./merging_index"),
    )

In [None]:
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.query_engine import RetrieverQueryEngine

postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

base_retriever = automerging_index.as_retriever(similarity_top_k=12)
retriever = AutoMergingRetriever(
    base_retriever, 
    automerging_index.storage_context,
    verbose=True
)

auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever,  node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ]
)


In [None]:
auto_merging_response = auto_merging_engine.query(
    "How do I build a portfolio of AI projects?"
)
print(str(auto_merging_response))

In [None]:
tru.reset_database()

from trulens.apps.llamaindex import TruLlama

tru_recorder = TruLlama(
    auto_merging_engine,
    app_id="2-Auto-merging retrieval",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        auto_merging_engine.query(question)

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
# # launches on http://localhost:8501/
tru.run_dashboard()