#uncomment to install llama-index, make sure it is above 0.10.x
## TODO :
NVIDIARerank.get_available_models is currently hardcoded as dictionaly items

In [1]:
from llama_index.postprocessor.nvidia_rerank import NVIDIARerank

NVIDIARerank.get_available_models()

dict_items([('nvidia', ['nv-rerank-qa-mistral-4b:1']), ('nim', ['nv-rerank-qa-mistral-4b:1'])])

In [2]:
from llama_index.postprocessor.nvidia_rerank import NVIDIARerank
import os
# making the data directory to hold txt data
if os.path.exists('./data'):
    os.removedirs('./data')
os.makedirs("./data", exist_ok=True)


ranker = NVIDIARerank(top_n=5)

## establish the reranker class with the local nim, supplying a base_url

In [3]:
from llama_index.postprocessor.nvidia_rerank import NVIDIARerank
rerank=NVIDIARerank().mode(mode="nim", base_url="http://127.0.0.1:1976/v1")

## we can set top_n like this

In [4]:
## setting top_n since our examples dont have more than 5 
rerank.top_n=2
print(rerank.top_n)
rerank.max_batch_size = 32


2


## use wget to fetch toy data from GenerativeAIexamples 

I created a folder called **data** under my current directory inside docker container


In [5]:
!wget https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/main/notebooks/toy_data/Sweden.txt -O ./data/Sweden.txt

--2024-04-24 13:07:02--  https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/main/notebooks/toy_data/Sweden.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 123365 (120K) [text/plain]
Saving to: ‘./data/Sweden.txt’


2024-04-24 13:07:04 (311 KB/s) - ‘./data/Sweden.txt’ saved [123365/123365]



In [6]:
!wget https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/main/notebooks/toy_data/Titanic_film.txt -O ./data/Titanic_film.txt

--2024-04-24 13:07:04--  https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/main/notebooks/toy_data/Titanic_film.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 85063 (83K) [text/plain]
Saving to: ‘./data/Titanic_film.txt’


2024-04-24 13:07:05 (321 KB/s) - ‘./data/Titanic_film.txt’ saved [85063/85063]



In [7]:
!ls ./data

Sweden.txt  Titanic_film.txt


## load some toy data

In [8]:
from llama_index.core import Document
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter,SimpleFileNodeParser
# load documents
import os 

documents = SimpleDirectoryReader("./data").load_data()

# parse nodes
parser = SentenceSplitter(separator='\n', chunk_size=200, chunk_overlap=0)
nodes = parser.get_nodes_from_documents(documents)



In [9]:
nodes[0].get_content , nodes[1].get_content

(<bound method TextNode.get_content of TextNode(id_='71234f14-5991-4c34-bc13-7c8197705e0f', embedding=None, metadata={'file_path': '/workspace/APICatalog_reranker/llama_index/data/Sweden.txt', 'file_name': 'Sweden.txt', 'file_type': 'text/plain', 'file_size': 123365, 'creation_date': '2024-04-24', 'last_modified_date': '2024-04-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='86c8c2ed-787a-4428-bc68-50679e105d30', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/workspace/APICatalog_reranker/llama_index/data/Sweden.txt', 'file_name': 'Sweden.txt', 'file_type': 'text/plain', 'file_size': 123365, 'creation_date': '2024-04-24', 'last_modified_date': '2024-04-24'}, hash='d00e68df561e30e85c2d5

## validate with NVreranker works

In [10]:
from llama_index.postprocessor.nvidia_rerank import NVIDIARerank
query="tell me about Sweden's geography?"
rerank.postprocess_nodes(nodes, query_str=query )


[NodeWithScore(node=TextNode(id_='71234f14-5991-4c34-bc13-7c8197705e0f', embedding=None, metadata={'file_path': '/workspace/APICatalog_reranker/llama_index/data/Sweden.txt', 'file_name': 'Sweden.txt', 'file_type': 'text/plain', 'file_size': 123365, 'creation_date': '2024-04-24', 'last_modified_date': '2024-04-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='86c8c2ed-787a-4428-bc68-50679e105d30', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/workspace/APICatalog_reranker/llama_index/data/Sweden.txt', 'file_name': 'Sweden.txt', 'file_type': 'text/plain', 'file_size': 123365, 'creation_date': '2024-04-24', 'last_modified_date': '2024-04-24'}, hash='d00e68df561e30e85c2d50c9d30976353fc03e06

## switch to APICatalog , for that we need to set the NVIDIA_API_KEY

In [11]:
import getpass
import os

## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.
## 10K free queries to any endpoint (which is a lot actually).

# del os.environ['NVIDIA_API_KEY']  ## delete key and reset
if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
    nvapi_key=os.environ["NVIDIA_API_KEY"] 

else:
    nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
    assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvapi_key


NVAPI Key (starts with nvapi-):  ······································································


In [12]:
from llama_index.postprocessor.nvidia_rerank import NVIDIARerank

my_key=os.environ["NVIDIA_API_KEY"]
rerank=NVIDIARerank().mode(mode="nvidia",api_key=my_key)

## query the reranker via API Catalog's reranker

In [13]:

rerank.postprocess_nodes(nodes, query_str=query )

[NodeWithScore(node=TextNode(id_='bb348d2e-f558-4c68-a0ee-11a78830942c', embedding=None, metadata={'file_path': '/workspace/APICatalog_reranker/llama_index/data/Sweden.txt', 'file_name': 'Sweden.txt', 'file_type': 'text/plain', 'file_size': 123365, 'creation_date': '2024-04-24', 'last_modified_date': '2024-04-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='86c8c2ed-787a-4428-bc68-50679e105d30', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/workspace/APICatalog_reranker/llama_index/data/Sweden.txt', 'file_name': 'Sweden.txt', 'file_type': 'text/plain', 'file_size': 123365, 'creation_date': '2024-04-24', 'last_modified_date': '2024-04-24'}, hash='d00e68df561e30e85c2d50c9d30976353fc03e06