# Lesson 1: Advanced RAG Pipeline

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
import openai

_ = load_dotenv(find_dotenv())  # read local .env file
openai.api_key = os.environ["OPENAI_API_KEY"]

In [2]:
# Download "Attention is All You Need" paper
# !wget https://export.arxiv.org/pdf/1706.03762.pdf

In [3]:
from llama_index import SimpleDirectoryReader, load_index_from_storage, StorageContext, ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import HierarchicalNodeParser, get_leaf_nodes
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine

In [4]:
documents = SimpleDirectoryReader(input_files=["./1706.03762.pdf"]).load_data()

print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[10])

<class 'list'> 

15 

<class 'llama_index.schema.Document'>
Doc ID: 76632cf1-a6fc-48d3-9c76-e970251dc7c5
Text: [5]Kyunghyun Cho, Bart van Merrienboer, Caglar Gulcehre, Fethi
Bougares, Holger Schwenk, and Yoshua Bengio. Learning phrase
representations using rnn encoder-decoder for statistical machine
translation. CoRR , abs/1406.1078, 2014. [6]Francois Chollet.
Xception: Deep learning with depthwise separable convolutions. arXiv
preprint arXiv:1610.02357 ...


In [5]:
def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index

In [6]:
def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=2,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

In [7]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

automerging_index = build_automerging_index(
    documents, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="merging_index"
)

automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

In [8]:
auto_merging_response = automerging_query_engine.query("How a transformer is built?")
print(str(auto_merging_response))

> Merging 3 nodes into parent node.
> Parent node id: f45cf1ea-7e86-4d21-8031-d85660a15ffb.
> Parent node text: 6 Results
6.1 Machine Translation
On the WMT 2014 English-to-German translation task, the big tra...

The Transformer model is built without using sequence-aligned RNNs or convolution. Instead, it relies entirely on self-attention to compute representations of its input and output. This allows for more parallelization and can achieve state-of-the-art translation quality. The model architecture of the Transformer is described in the following sections.
