### Setup

In [2]:
from dotenv import load_dotenv, find_dotenv
import os
import openai

_ = load_dotenv(find_dotenv())
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

In [3]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

### Load Data

In [4]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import pandas as pd

In [6]:
xsum_dataset = load_dataset(
    "xsum", version="1.2.0"
)  

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [7]:
xsum_sample = xsum_dataset["train"].select(range(1000)).to_pandas()

# Combining 'document' and 'summary' columns
xsum_sample["combined"] = (
    "Document: " + xsum_sample.document.str.strip() + "; Summary: " + xsum_sample.summary.str.strip()
)
xsum_sample.head(2)

Unnamed: 0,document,summary,id,combined
0,"The full cost of damage in Newton Stewart, one...",Clean-up operations are continuing across the ...,35232142,Document: The full cost of damage in Newton St...
1,A fire alarm went off at the Holiday Inn in Ho...,Two tourist buses have been destroyed by fire ...,40143035,Document: A fire alarm went off at the Holiday...


In [8]:
xsum_sample["combined"] = (
    "Document: " + xsum_sample.document.str.strip() + "; Summary: " + xsum_sample.summary.str.strip()
)

In [9]:
!mkdir -p 'document/'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
for i, document in enumerate(xsum_sample["combined"]):
    file_name = f'document/document_{i+1}.txt'  # Generate a unique filename for each document
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(document)  # Write each document to its own file

In [10]:
from llama_index.core import SimpleDirectoryReader

# load documents
loader = SimpleDirectoryReader(input_dir="./document/")
documents = loader.load_data()

In [11]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

1000 

<class 'llama_index.core.schema.Document'>
Doc ID: 65ac295f-76a3-43d3-b82a-a34736d95d90
Text: Document: The full cost of damage in Newton Stewart, one of the
areas worst affected, is still being assessed. Repair work is ongoing
in Hawick and many roads in Peeblesshire remain badly affected by
standing water. Trains on the west coast mainline face disruption due
to damage at the Lamington Viaduct. Many businesses and householders
were aff...


In [12]:
from llama_index.core.schema import MetadataMode

In [13]:
print(documents[0].get_content(metadata_mode=MetadataMode.ALL))

file_path: /Users/linghuang/Git/NLP/notebook/document/document_1.txt
file_name: document_1.txt
file_type: text/plain
file_size: 2470
creation_date: 2024-05-17
last_modified_date: 2024-05-17

Document: The full cost of damage in Newton Stewart, one of the areas worst affected, is still being assessed.
Repair work is ongoing in Hawick and many roads in Peeblesshire remain badly affected by standing water.
Trains on the west coast mainline face disruption due to damage at the Lamington Viaduct.
Many businesses and householders were affected by flooding in Newton Stewart after the River Cree overflowed into the town.
First Minister Nicola Sturgeon visited the area to inspect the damage.
The waters breached a retaining wall, flooding many commercial properties on Victoria Street - the main shopping thoroughfare.
Jeanette Tate, who owns the Cinnamon Cafe which was badly affected, said she could not fault the multi-agency response once the flood hit.
However, she said more preventative work c

In [14]:
print(documents[0].metadata)

{'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_1.txt', 'file_name': 'document_1.txt', 'file_type': 'text/plain', 'file_size': 2470, 'creation_date': '2024-05-17', 'last_modified_date': '2024-05-17'}


In [15]:
text_template = "Content Metadata:\n{metadata_str}\n\nContent:\n{content}"

metadata_template = "{key}: {value},"
metadata_seperator= " "

for doc in documents:
    doc.text_template = text_template
    doc.metadata_template = metadata_template
    doc.metadata_seperator = metadata_seperator

In [16]:
print(documents[0].get_content(metadata_mode=MetadataMode.ALL))

Content Metadata:
file_path: /Users/linghuang/Git/NLP/notebook/document/document_1.txt, file_name: document_1.txt, file_type: text/plain, file_size: 2470, creation_date: 2024-05-17, last_modified_date: 2024-05-17,

Content:
Document: The full cost of damage in Newton Stewart, one of the areas worst affected, is still being assessed.
Repair work is ongoing in Hawick and many roads in Peeblesshire remain badly affected by standing water.
Trains on the west coast mainline face disruption due to damage at the Lamington Viaduct.
Many businesses and householders were affected by flooding in Newton Stewart after the River Cree overflowed into the town.
First Minister Nicola Sturgeon visited the area to inspect the damage.
The waters breached a retaining wall, flooding many commercial properties on Victoria Street - the main shopping thoroughfare.
Jeanette Tate, who owns the Cinnamon Cafe which was badly affected, said she could not fault the multi-agency response once the flood hit.
However, 

### Advanced Customization

In [17]:
documents[0].excluded_llm_metadata_keys = ["file_path"]
print(documents[0].get_content(metadata_mode=MetadataMode.LLM))

Content Metadata:
file_name: document_1.txt, file_type: text/plain, file_size: 2470, creation_date: 2024-05-17, last_modified_date: 2024-05-17,

Content:
Document: The full cost of damage in Newton Stewart, one of the areas worst affected, is still being assessed.
Repair work is ongoing in Hawick and many roads in Peeblesshire remain badly affected by standing water.
Trains on the west coast mainline face disruption due to damage at the Lamington Viaduct.
Many businesses and householders were affected by flooding in Newton Stewart after the River Cree overflowed into the town.
First Minister Nicola Sturgeon visited the area to inspect the damage.
The waters breached a retaining wall, flooding many commercial properties on Victoria Street - the main shopping thoroughfare.
Jeanette Tate, who owns the Cinnamon Cafe which was badly affected, said she could not fault the multi-agency response once the flood hit.
However, she said more preventative work could have been carried out to ensure 

https://docs.llamaindex.ai/en/stable/examples/node_postprocessor/MetadataReplacementDemo/

### Extract Nodes

In [18]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter

In [19]:
# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [20]:
nodes = node_parser.get_nodes_from_documents(documents)

In [21]:
from llama_index.core import VectorStoreIndex

sentence_index = VectorStoreIndex(nodes)

### Query With MetadataReplacementPostProcessor

In [22]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

In [23]:
query_engine = sentence_index.as_query_engine(
    similarity_top_k=2,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [24]:
window_response = query_engine.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)
print(window_response)

You may want to explore the Harry Potter series, particularly "Harry Potter and the Cursed Child," which has received five-star reviews from critics and is described as a "game-changing production." Additionally, the series has sold over 450 million copies since 1997 and has been adapted into eight films.


In [25]:
window_response.get_formatted_sources()

'> Source (Doc id: d7263704-7062-41d5-a864-442d5818b764): Ben Brantley in the New York Times writes: "Like the novels that preceded it, The Cursed Child is...\n\n> Source (Doc id: 2d983e05-6b98-4cfb-a965-7397c45f65e2): It tells the story of a band of rabbits in search of a new home after the destruction of their wa...'

In [26]:
window_response.metadata

{'d7263704-7062-41d5-a864-442d5818b764': {'window': 'Ben Brantley in the New York Times writes: "Like the novels that preceded it, The Cursed Child is stuffed with arcana-filled plots that defy diagrams and baldly wrought sentimental life lessons, along with anguished dives into the earnest, tortured solipsism of adolescence.\n "By rights, such a combination should try the patience of any grown-up.  But like Ms Rowling\'s books, the play vanquishes resistance."\n The Harry Potter books have sold more than 450 million copies since 1997 and been adapted into eight films.\n The script of Harry Potter and the Cursed Child is published this weekend.\n Follow us on Twitter @BBCNewsEnts, on Instagram at bbcnewsents, or email entertainment.news@bbc.co.uk. ; Summary: Harry Potter and the Cursed Child has won five-star reviews from critics, with one describing it as "a game-changing production".',
  'original_text': 'The Harry Potter books have sold more than 450 million copies since 1997 and be

In [27]:
window = window_response.source_nodes[0].node.metadata["window"]
sentence = window_response.source_nodes[0].node.metadata["original_text"]

print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

Window: Ben Brantley in the New York Times writes: "Like the novels that preceded it, The Cursed Child is stuffed with arcana-filled plots that defy diagrams and baldly wrought sentimental life lessons, along with anguished dives into the earnest, tortured solipsism of adolescence.
 "By rights, such a combination should try the patience of any grown-up.  But like Ms Rowling's books, the play vanquishes resistance."
 The Harry Potter books have sold more than 450 million copies since 1997 and been adapted into eight films.
 The script of Harry Potter and the Cursed Child is published this weekend.
 Follow us on Twitter @BBCNewsEnts, on Instagram at bbcnewsents, or email entertainment.news@bbc.co.uk. ; Summary: Harry Potter and the Cursed Child has won five-star reviews from critics, with one describing it as "a game-changing production".
------------------
Original Sentence: The Harry Potter books have sold more than 450 million copies since 1997 and been adapted into eight films.



In [28]:
for source_node in window_response.source_nodes:
    print(source_node.node.metadata["original_text"])
    print("--------")

The Harry Potter books have sold more than 450 million copies since 1997 and been adapted into eight films.

--------
"Before there was Harry Potter, there was Watership Down," said the BBC's drama commissioning editor Matthew Read.

--------


## Basic RAG

In [13]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

# build VectorStoreIndex that takes care of chunking documents
# and encoding chunks to embeddings for future retrieval
index = VectorStoreIndex.from_documents(documents=documents)

# The QueryEngine class is equipped with the generator
# and facilitates the retrieval and generation steps
query_engine = index.as_query_engine()

# Use your Default RAG
response = query_engine.query( "I'm looking for the information of Harry Potter. What could you suggest to me?")

In [20]:
response.get_formatted_sources()

"> Source (Doc id: 32cbbe79-6445-4e21-a4d0-39e2953da25c): JK Rowling is going to make (another) fortune. The West End's ornate Palace Theatre, itself a lit...\n\n> Source (Doc id: 2b39b831-184b-4755-9ef5-f015b929ca23): Document: The play, written by Jack Thorne, is set 19 years after the seventh and final book in t..."

In [21]:
response.metadata

{'32cbbe79-6445-4e21-a4d0-39e2953da25c': {'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_270.txt',
  'file_name': 'document_270.txt',
  'file_type': 'text/plain',
  'file_size': 5214,
  'creation_date': '2024-05-17',
  'last_modified_date': '2024-05-17'},
 '2b39b831-184b-4755-9ef5-f015b929ca23': {'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_270.txt',
  'file_name': 'document_270.txt',
  'file_type': 'text/plain',
  'file_size': 5214,
  'creation_date': '2024-05-17',
  'last_modified_date': '2024-05-17'}}

In [22]:
response.response

'I would suggest looking into the play "Harry Potter and the Cursed Child," which is set 19 years after the seventh and final book in the series by JK Rowling. It has received positive reviews from critics and is presented in two parts, showcasing the stars of the wizarding saga as adults in their mid-30s as their own children head off to school. The play has been described as a "truly game-changing production" and offers a thrilling theatrical experience.'

In [23]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='32cbbe79-6445-4e21-a4d0-39e2953da25c', embedding=None, metadata={'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_270.txt', 'file_name': 'document_270.txt', 'file_type': 'text/plain', 'file_size': 5214, 'creation_date': '2024-05-17', 'last_modified_date': '2024-05-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0d0bfb05-f5b3-47dd-a913-73260f885295', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_270.txt', 'file_name': 'document_270.txt', 'file_type': 'text/plain', 'file_size': 5214, 'creation_date': '2024-05-17', 'last_modified_date': '2024-05-17'}, hash='510175da92c7481c404c1c32808be9

### Advanced RAG

#### RecursiveRetriever

In [95]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import IndexNode

In [96]:
# build parent chunks via NodeParser
node_parser = SentenceSplitter(chunk_size=1024)
base_nodes = node_parser.get_nodes_from_documents(documents)

In [97]:
vector_index0 = VectorStoreIndex(base_nodes)
vector_query_engine0 = vector_index0.as_query_engine()

In [98]:
response = vector_query_engine0.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)

In [99]:
response.response

'I would suggest looking into the play "Harry Potter and the Cursed Child" written by Jack Thorne, set 19 years after the seventh book in the series by JK Rowling. The play has received five-star reviews from critics and is described as a game-changing production. It is presented in two parts, showcasing the characters from the wizarding saga as adults in their mid-30s. The play has been praised for its thrilling stage craft, complex plot, strong performances, and stunning special effects. It offers a unique and original experience separate from both the books and the film adaptations.'

In [100]:
# define smaller child chunks
sub_chunk_sizes = [256, 512]
sub_node_parsers = [
    SentenceSplitter(chunk_size=c, chunk_overlap=20) for c in sub_chunk_sizes
]
all_nodes = []
for base_node in base_nodes:
    for n in sub_node_parsers:
        sub_nodes = n.get_nodes_from_documents([base_node])
        sub_inodes = [
            IndexNode.from_text_node(sn, base_node.node_id) for sn in sub_nodes
        ]
        all_nodes.extend(sub_inodes)
    # also add original node to node
    original_node = IndexNode.from_text_node(base_node, base_node.node_id)
    all_nodes.append(original_node)

In [101]:
# build RecursiveRetriever
recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    node_dict=all_nodes_dict,
    verbose=True,
)

In [102]:
response_synthesizer = get_response_synthesizer(response_mode="compact")

query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever, response_synthesizer=response_synthesizer
)

In [103]:
response = query_engine.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)

[1;3;34mRetrieving with query id None: I'm looking for the information of Harry Potter. What could you suggest to me?
[0m[1;3;38;5;200mRetrieving text node: JK Rowling is going to make (another) fortune. The West End's ornate Palace Theatre, itself a little like Hogwarts, has a hit probably for years."
Variety describes it as "spellbinding", Matt Trueman writing that it is: "The Show That Lived Up to Expectations â€” and Then Some."
He says the relationship between Albus and Scorpius is "the friendship of two bullied boys bound together, and it's a beautiful, tender thing", adding: "The script by Jack Thorne recognizes that rejection breeds resentment, and outsiders stew into outcasts.
"No one's born a villain, nor sees themselves as such, and where the books gave us stock baddies, Cursed Child fleshes them out."
Chris Jones, in the Chicago Tribune, says that "heretical as this may sound", the play left him "quietly lamenting that the movies were ever made".
Ben Brantley in the New 

In [104]:
response.response

'I would suggest looking into the play "Harry Potter and the Cursed Child" written by Jack Thorne, set 19 years after the seventh book in the series by JK Rowling. The play has received positive reviews from critics and offers a new and original storyline featuring the characters as adults. It is presented in two parts and has been praised for its thrilling theatrical production and captivating storytelling.'

In [105]:
response.source_nodes[0].node.get_content()

'JK Rowling is going to make (another) fortune. The West End\'s ornate Palace Theatre, itself a little like Hogwarts, has a hit probably for years."\nVariety describes it as "spellbinding", Matt Trueman writing that it is: "The Show That Lived Up to Expectations â€” and Then Some."\nHe says the relationship between Albus and Scorpius is "the friendship of two bullied boys bound together, and it\'s a beautiful, tender thing", adding: "The script by Jack Thorne recognizes that rejection breeds resentment, and outsiders stew into outcasts.\n"No one\'s born a villain, nor sees themselves as such, and where the books gave us stock baddies, Cursed Child fleshes them out."\nChris Jones, in the Chicago Tribune, says that "heretical as this may sound", the play left him "quietly lamenting that the movies were ever made".\nBen Brantley in the New York Times writes: "Like the novels that preceded it, The Cursed Child is stuffed with arcana-filled plots that defy diagrams and baldly wrought sentim

In [106]:
# from llama_index.embeddings.openai import OpenAIEmbedding
# from llama_index.llms.openai import OpenAI
# from llama_index.core import Settings

# Settings.llm = OpenAI(model="gpt-3.5-turbo")
# Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [107]:
# define a VectorStoreIndex with all of the nodes
# vector_index_chunk = VectorStoreIndex(all_nodes)
# vector_retriever = vector_index_chunk.as_retriever(similarity_top_k=2)

In [108]:
# create a vector store index for each folder
# from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
# try:
#     index = load_index_from_storage(StorageContext.from_defaults(persist_dir="document/index"))
# except:
#     index = VectorStoreIndex.from_documents(documents)
#     index.storage_context.persist(persist_dir="document/index")

In [109]:
# from llama_index.core.retrievers import RecursiveRetriever
# from llama_index.core.query_engine import RetrieverQueryEngine
# from llama_index.core import get_response_synthesizer

In [110]:
# build RecursiveRetriever
# recursive_retriever = RecursiveRetriever(
#     "vector",
#     retriever_dict={"vector": vector_retriever},
#     node_dict=all_nodes_dict,
#     verbose=True,
# )

In [111]:
# build RetrieverQueryEngine using recursive_retriever
# response_synthesizer = get_response_synthesizer(response_mode="compact")

# query_engine = RetrieverQueryEngine.from_args(
#     recursive_retriever, response_synthesizer=response_synthesizer
# )

In [112]:
# perform inference with advanced RAG (i.e. query engine)
# response = query_engine.query(
#     "I'm looking for the information of Harry Potter. What could you suggest to me?"
# )

### Custom Retrievers - Hybrid Search

In [85]:
from llama_index.core import Settings

nodes = Settings.node_parser.get_nodes_from_documents(documents)

In [86]:
from llama_index.core import StorageContext

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

In [87]:
from llama_index.core import SimpleKeywordTableIndex, VectorStoreIndex

vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

In [88]:
# Define Custom Retriever
# import QueryBundle
from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever,
)

from typing import List

In [89]:
class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both semantic search and hybrid search."""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        keyword_retriever: KeywordTableSimpleRetriever,
        mode: str = "AND",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._keyword_retriever = keyword_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        keyword_nodes = self._keyword_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        keyword_ids = {n.node.node_id for n in keyword_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in keyword_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(keyword_ids)
        else:
            retrieve_ids = vector_ids.union(keyword_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

#### Plugin Retriever into Query Engine 

In [90]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

# define custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=2)
keyword_retriever = KeywordTableSimpleRetriever(index=keyword_index)
custom_retriever = CustomRetriever(vector_retriever, keyword_retriever)

# define response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

# vector query engine
vector_query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
)
# keyword query engine
keyword_query_engine = RetrieverQueryEngine(
    retriever=keyword_retriever,
    response_synthesizer=response_synthesizer,
)

In [91]:
response = custom_query_engine.query(
    "I'm looking for the information of Harry Potter. What could you suggest to me?"
)

In [93]:
response.response

'I would suggest looking into the play "Harry Potter and the Cursed Child", which has received positive reviews from critics. It delves into the relationships and complexities of characters like Albus and Scorpius, providing a fresh perspective on the Harry Potter universe.'

In [94]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='542bc97f-5b05-4649-89c1-db27dae6e270', embedding=None, metadata={'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_270.txt', 'file_name': 'document_270.txt', 'file_type': 'text/plain', 'file_size': 5214, 'creation_date': '2024-05-17', 'last_modified_date': '2024-05-17'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='d4d4a061-67a0-4e48-8538-cf53f2c0ca34', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/Users/linghuang/Git/NLP/notebook/document/document_270.txt', 'file_name': 'document_270.txt', 'file_type': 'text/plain', 'file_size': 5214, 'creation_date': '2024-05-17', 'last_modified_date': '2024-05-17'}, hash='510175da92c7481c404c1c32808be9

## Advanced techniques for Generation

In [68]:
import time
from llama_index.core import VectorStoreIndex
from llama_index.core.postprocessor import SentenceEmbeddingOptimizer

In [69]:
index = VectorStoreIndex.from_documents(documents=documents)

In [70]:
print("Without optimization")
start_time = time.time()
query_engine = index.as_query_engine()
response = query_engine.query( "I'm looking for the information of Harry Potter. What could you suggest to me?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(response))

Without optimization
Total time elapsed: 2.6994528770446777
Answer: I would suggest looking into the play "Harry Potter and the Cursed Child" written by Jack Thorne, set 19 years after the seventh book in the series by JK Rowling. The play has received five-star reviews from critics and is described as a game-changing production. It is presented in two parts, showcasing the characters as adults in their mid-30s with their own children heading off to school. The play has been praised for its thrilling stage craft, complex plot, strong performances, and magical imagery that will amaze fans of the Harry Potter series.


In [74]:
print("With optimization")
start_time = time.time()
query_engine = index.as_query_engine(
    node_postprocessors=[SentenceEmbeddingOptimizer(percentile_cutoff=0.7)]
)
response = query_engine.query( "I'm looking for the information of Harry Potter. What could you suggest to me?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(response))

With optimization
Total time elapsed: 3.065455198287964
Answer: You may want to explore the play "Harry Potter and the Cursed Child," which is set 19 years after the seventh book in the series by JK Rowling. It has received critical acclaim and features the adult characters from the wizarding saga as they send their own children off to school. The play is presented in two parts and has been described as a "truly game-changing production" with impressive performances and special effects.


In [80]:
print("Alternate optimization cutoff")
start_time = time.time()
query_engine = index.as_query_engine(
    node_postprocessors=[SentenceEmbeddingOptimizer(percentile_cutoff=0.1)]
)
response = query_engine.query( "I'm looking for the information of Harry Potter. What could you suggest to me?")
end_time = time.time()
print("Total time elapsed: {}".format(end_time - start_time))
print("Answer: {}".format(response))

Alternate optimization cutoff
Total time elapsed: 3.0654871463775635
Answer: You may find information about Harry Potter, his son Albus, his friend Scorpius Malfoy, and other characters in the play mentioned in the content. The play is praised for its strong performances, special effects, and moments of comedy, with critics highlighting the dynamics between the characters and the depth added to previously known characters from the Harry Potter series.


### Node Postprocessor

In [None]:
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core.data_structs import Node
from llama_index.core.schema import NodeWithScore

nodes = [
    NodeWithScore(node=Node(text="text1"), score=0.7),
    NodeWithScore(node=Node(text="text2"), score=0.8),
]

# similarity postprocessor: filter nodes below 0.75 similarity score
processor = SimilarityPostprocessor(similarity_cutoff=0.75)
filtered_nodes = processor.postprocess_nodes(nodes)

# cohere rerank: rerank nodes given query using trained model
reranker = CohereRerank(api_key="<COHERE_API_KEY>", top_n=2)
reranker.postprocess_nodes(nodes, query_str="<user_query>")

In [116]:
from llama_index.core.postprocessor import  LongLLMLinguaPostprocessor

ImportError: cannot import name 'LongLLMLinguaPostprocessor' from 'llama_index.core.postprocessor' (/Users/linghuang/miniconda3/envs/llama-index4/lib/python3.10/site-packages/llama_index/core/postprocessor/__init__.py)

In [114]:
from llama_index.core.postprocessor import LongLLMLinguaPostprocessor

ImportError: cannot import name 'LongLLMLinguaPostprocessor' from 'llama_index.core.postprocessor' (/Users/linghuang/miniconda3/envs/llama-index4/lib/python3.10/site-packages/llama_index/core/postprocessor/__init__.py)

In [81]:
### Define a Postprocessor object, here LongLLMLinguaPostprocessor
### Build QueryEngine that uses this Postprocessor on retrieved docs

# Define Postprocessor
node_postprocessor = LongLLMLinguaPostprocessor(
    instruction_str="Given the context, please answer the final question",
    target_token=300,
    rank_method="longllmlingua",
    additional_compress_kwargs={
        "condition_compare": True,
        "condition_in_question": "after",
        "context_budget": "+100",
        "reorder_context": "sort",  # enable document reorder
        "dynamic_context_compression_ratio": 0.4, # enable dynamic compression ratio
    },
)

NameError: name 'LongLLMLinguaPostprocessor' is not defined

In [None]:
from llama_index.core.postprocessor import LongContextReorder

postprocessor = LongContextReorder()

postprocessor.postprocess_nodes(nodes)

### Create Query Engine Tools

In [21]:
from llama_index.core.tools import QueryEngineTool

tool = QueryEngineTool.from_defaults(
    query_engine=index.as_query_engine(),
    name="Document Modules",
    description="Useful for answering questions about related BBC News."
)

In [22]:
# needed for notebooks
import nest_asyncio
nest_asyncio.apply()

from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[
        tool,
    ],
    # enable this for streaming
    # response_synthesizer=get_response_synthesizer(streaming=True),
    verbose=False
)

In [23]:
response = query_engine.query("I'm looking for the information of Harry Potter. What could you suggest to me?")
print(str(response))

You may want to explore the latest developments and critical reception of a recent play related to Harry Potter.


### Define Summary Index and Vector Index over Same Data

In [24]:
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

# initialize settings (set chunk size)
Settings.chunk_size = 512
nodes = Settings.node_parser.get_nodes_from_documents(documents)

In [25]:
from llama_index.core import StorageContext

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

In [26]:
from llama_index.core import SummaryIndex
from llama_index.core import VectorStoreIndex

summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

### Define Query Engines and Set Metadata

In [27]:
# needed for notebooks
import nest_asyncio
nest_asyncio.apply()

list_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [28]:
from llama_index.core.tools import QueryEngineTool


list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description=(
        "Useful for summarization questions related to Harry Potter"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for summarization questions related to Harry Potter"
    ),
)

### Define Router Query Engine

#### PydanticSingleSelector

In [29]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector

from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)


query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [30]:
response = query_engine.query("I'm looking for the information of Harry Potter. What could you suggest to me?")
print(str(response))

Retrying llama_index.llms.openai.base.OpenAI._achat in 0.26928458609039607 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-1106 in organization org-LXJdh8mBCdaeYl79io2xzzqI on tokens per min (TPM): Limit 60000, Used 57149, Requested 17864. Please try again in 15.012s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 0.32230253247243634 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-1106 in organization org-LXJdh8mBCdaeYl79io2xzzqI on tokens per min (TPM): Limit 60000, Used 57039, Requested 18117. Please try again in 15.156s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-1106 in organization org-LXJdh8mBCdaeYl79io2xzzqI on tokens per min (TPM): Limit 60000, Used 59571, Requested 18117. Please try again in 17.688s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

In [20]:
query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [21]:
response = query_engine.query("Harry Potter")
print(str(response))

  '|'.join(regex_opt_inner(list(group[1]), '')
  '|'.join(regex_opt_inner(list(group[1]), '')


RuntimeError: asyncio.run() cannot be called from a running event loop