In [73]:
import os
import openai
import warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.environ['OPENAI_API_KEY']

import sys
sys.path.append("..")
from utils_Metrics import get_prebuilt_trulens_recorder

In [74]:
from llama_index import SimpleDirectoryReader

# Loading and cchunking the documents

documents=SimpleDirectoryReader(
    input_files=["../data/Machine Learning Engineering with Python-2023.pdf"]
).load_data()


In [75]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

463 

<class 'llama_index.schema.Document'>
Doc ID: 2649656e-14b5-49ad-8cc3-5da6a3a7d3cc
Text:


## Auto Merging Retrieval setup

In [76]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [77]:
from llama_index.node_parser import HierarchicalNodeParser

# create the hierarchical node parser w/ default settings
node_parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 512, 128]
)

In [78]:
nodes = node_parser.get_nodes_from_documents([document])

In [79]:
from llama_index.node_parser import get_leaf_nodes

leaf_nodes = get_leaf_nodes(nodes)
print(leaf_nodes[30].text)

programming • 152
Functional programming • 154
Packaging your code  ��������������������������������������������������������������������������������������������������������  157
Why package? • 157
Selecting use cases for packaging • 158
Designing your package • 159
Building your package  ����������������������������������������������������������������������������������������������������  164
Managing your environment with Makefiles • 166
Getting all poetic with Poetry • 170
Testing, logging,


In [80]:
nodes_by_id = {node.node_id: node for node in nodes}

parent_node = nodes_by_id[leaf_nodes[30].parent_node.node_id]
print(parent_node.text)

Table of Contents xii
Writing good Python  ��������������������������������������������������������������������������������������������������������  142
Recapping the basics • 143
Tips and tricks • 145
Adhering to standards • 149
Writing good PySpark • 151
Choosing a style  ���������������������������������������������������������������������������������������������������������������  151
Object-oriented programming • 152
Functional programming • 154
Packaging your code  ��������������������������������������������������������������������������������������������������������  157
Why package? • 157
Selecting use cases for packaging • 158
Designing your package • 159
Building your package  ����������������������������������������������������������������������������������������������������  164
Managing your environment with Makefiles • 166
Getting all poetic with Poetry • 170
Testing, logging, securing, and error handling  �����������������������������������������������������������������

### Building the index

In [81]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [82]:
from llama_index import ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding


#config embedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    node_parser=node_parser,
)

In [83]:
from llama_index import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

automerging_index = VectorStoreIndex(
    leaf_nodes, storage_context=storage_context, service_context=auto_merging_context
)

automerging_index.storage_context.persist(persist_dir="./merging_index")

In [84]:
# This block of code is optional to check
# if an index file exist, then it will load it
# if not, it will rebuild it

import os
from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index import load_index_from_storage

if not os.path.exists("./merging_index"):
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
            service_context=auto_merging_context
        )

    automerging_index.storage_context.persist(persist_dir="./merging_index")
else:
    automerging_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./merging_index"),
        service_context=auto_merging_context
    )


### Defining the retriever and running the query engine

In [85]:
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.retrievers import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine

automerging_retriever = automerging_index.as_retriever(
    similarity_top_k=12
)

retriever = AutoMergingRetriever(
    automerging_retriever, 
    automerging_index.storage_context, 
    verbose=True
)

rerank = SentenceTransformerRerank(top_n=6, model="BAAI/bge-reranker-base")

auto_merging_engine = RetrieverQueryEngine.from_args(
    automerging_retriever, node_postprocessors=[rerank]
)

In [86]:
auto_merging_response = auto_merging_engine.query(
    "How to publish in ECR ?"
)

In [87]:
from llama_index.response.notebook_utils import display_response

display_response(auto_merging_response)

**`Final Response:`** To publish in ECR, you need to navigate to the AWS Management Console and click on "Create Cluster" in the ECS section. Fill out the form with details about networking, infrastructure, monitoring, and tags for the resources you are about to create. Provide container details such as the URI for the image you want to use, which should be the URI for the image pushed to the ECR repository. After that, deploy the Docker image to the container registry using the command "docker push <YOUR_AWS_ID>.dkr.ecr.eu-west-1.amazonaws.com/basic-ml-microservice:latest". If successful, the Docker image will be pushed to the ECR repository.

## Putting it all Together

In [88]:
import os

from llama_index import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
)
from llama_index.node_parser import HierarchicalNodeParser
from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext, load_index_from_storage
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

In [89]:
from llama_index.llms import OpenAI

index = build_automerging_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./merging_index",
)


In [90]:
query_engine = get_automerging_query_engine(index, similarity_top_k=6)

## TruLens Evaluation

In [91]:
from trulens_eval import Tru

Tru().reset_database()

### Two layers

In [92]:
auto_merging_index_0 = build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model=embed_model,
    save_dir="merging_index_0",
    chunk_sizes=[2048,512],
)

In [93]:
auto_merging_engine_0 = get_automerging_query_engine(
    auto_merging_index_0,
    similarity_top_k=12,
    rerank_top_n=6,
)

In [94]:
from utils_Metrics import get_prebuilt_trulens_recorder

tru_recorder = get_prebuilt_trulens_recorder(
    auto_merging_engine_0,
    app_id ='app_0'
)

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [95]:
eval_questions = []
with open('../eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [96]:
def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)

In [97]:
run_evals(eval_questions, tru_recorder, auto_merging_engine_0)

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x2b9335c4b50 is calling an instrumented method <function BaseQueryEngine.query at 0x000002B923922B90>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2b9335b4850) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x2b9335c4b50 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x000002B9284F9480>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2b9335b4850) using this function.
A new object of type <class 'llama_index.retrievers.auto_merging_retriever.AutoMergingRetriever'> at 0x2b933403760 is calling an instrumented method <function BaseRetriever.retrieve at 0x000002B923921F30>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on other obje

> Merging 1 nodes into parent node.
> Parent node id: 4122a381-1fda-4043-be1d-9f40eae1f5f7.
> Parent node text: Packaging Up 180
13. The output is shown in the following screenshot:
Figure 4.11: Output of succ...

> Merging 1 nodes into parent node.
> Parent node id: 0ca0b4f8-de7a-4fd9-87ba-977c4bbc7353.
> Parent node text: Chapter 4 183
There are many more features of Bandit but this shows how easy it is to get started...

> Merging 1 nodes into parent node.
> Parent node id: 38e7acd5-3de4-4d0b-a689-f0829ccaebf7.
> Parent node text: Chapter 2 55
Code version control
If you are going to write code for real systems, you are almost...

> Merging 1 nodes into parent node.
> Parent node id: 96fd291b-ee1d-4de3-b62b-a2d6bd5351b8.
> Parent node text: Chapter 2 45
Discover
Before you start working to build any solution, it is vitally important tha...

> Merging 1 nodes into parent node.
> Parent node id: faeed448-7a1b-4798-b219-05fca506a6fc.
> Parent node text: 4
Packaging Up
In previous chapt

A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x2b9335c4fd0 is calling an instrumented method <function CompactAndRefine.get_response at 0x000002B924894160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x2b9335b6650) using this function.
A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x2b9335c4fd0 is calling an instrumented method <function Refine.get_response at 0x000002B924F12950>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x2b9335b6650) using this function.
A new object of type <class 'llama_index.llm_predictor.base.LLMPredictor'> at 0x2b952fb5980 is calling an instrumented method <function LLMPredictor.predict at 0x000002B921965360>. The path of this call may be incorrect.
Guessing path of new object is app._response_

In [98]:
from trulens_eval import Tru

Tru().get_leaderboard(app_ids=[])

Unnamed: 0_level_0,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1
app_0,21.0,0.004028


In [99]:
# Tru().run_dashboard()

### Three Layers test


In [100]:
auto_merging_index_1= build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo",temperature=0.1),
    embed_model=embed_model,
    save_dir="merging_index_1",
    chunk_sizes=[2048,512,128],
)

In [101]:
auto_merging_engine_1 = get_automerging_query_engine(
    auto_merging_index_1,
    similarity_top_k=12,
    rerank_top_n=6
)

In [102]:
tru_recorder= get_prebuilt_trulens_recorder(
    auto_merging_engine_1,
    app_id='app_1'
)

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [103]:
run_evals(eval_questions,tru_recorder,auto_merging_engine_1)

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x2b931bb3280 is calling an instrumented method <function BaseQueryEngine.query at 0x000002B923922B90>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2b9335b4850) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x2b931bb3280 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x000002B9284F9480>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2b9335b4850) using this function.
A new object of type <class 'llama_index.retrievers.auto_merging_retriever.AutoMergingRetriever'> at 0x2b95c779d50 is calling an instrumented method <function BaseRetriever.retrieve at 0x000002B923921F30>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on other obje

> Merging 3 nodes into parent node.
> Parent node id: 32986d54-d800-45b2-9839-ca0ec8892260.
> Parent node text: Packaging Up 180
13. The output is shown in the following screenshot:
Figure 4.11: Output of succ...

> Merging 1 nodes into parent node.
> Parent node id: bcc74dfd-9596-46d8-bed5-5c9321f36e8b.
> Parent node text: Packaging Up 180
13. The output is shown in the following screenshot:
Figure 4.11: Output of succ...



A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x2b931bb17e0 is calling an instrumented method <function CompactAndRefine.get_response at 0x000002B924894160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x2b9335b6650) using this function.
A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x2b931bb17e0 is calling an instrumented method <function Refine.get_response at 0x000002B924F12950>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x2b9335b6650) using this function.
A new object of type <class 'llama_index.llm_predictor.base.LLMPredictor'> at 0x2b960898f40 is calling an instrumented method <function LLMPredictor.predict at 0x000002B921965360>. The path of this call may be incorrect.
Guessing path of new object is app._response_

In [43]:
from trulens_eval import Tru

Tru().get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Groundedness,Answer Relevance,Context Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
app_1,0.875,0.95,0.566667,38.5,0.002901
