## REBEL: RErank BEyond reLevance
This notebook will walk you through how to set up and run the REBEL method for RAG reranking

In [None]:
# pip install llama_index from our fork, once the PR is merged this will just be a normal llama_index install
# NOTE, restart the session after installing then skip this cell
!git clone https://github.com/bvarjavand/llama_index.git
!cd llama_index && pip install --quiet -e .
!cd llama_index/llama-index-core && pip install --quiet -e .

In [2]:
import os

OPENAI_API_KEY="sk-" # put your openai key here
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
OPENAI_API_BASE="" # optional, if you want to use another hosted endpoint
if OPENAI_API_BASE:
    os.environ["OPENAI_API_BASE"] = OPENAI_API_BASE


In [4]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import LLMRerank
from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display
from llama_index.core import Settings
import nest_asyncio
nest_asyncio.apply()

if OPENAI_API_BASE:
    Settings.llm = OpenAI(temperature=0, api_key=OPENAI_API_KEY, model="gpt-4o", api_base=OPENAI_API_BASE)
else:
    Settings.llm = OpenAI(temperature=0, api_key=OPENAI_API_KEY, model="gpt-4o")
Settings.chunk_size = 512

## Build RAG index

In [5]:
from pathlib import Path
import requests
from llama_index.embeddings.openai import OpenAIEmbedding

# save documents from wikipedia with these title(s):
wiki_titles = [
    "Vincent van Gogh",
]

# pull documents from wikipedia
data_path = Path("data_wiki")
for title in wiki_titles:
    response = requests.get(
        "https://en.wikipedia.org/w/api.php",
        params={
            "action": "query",
            "format": "json",
            "titles": title,
            "prop": "extracts",
            "explaintext": True,
        },
    ).json()
    page = next(iter(response["query"]["pages"].values()))
    wiki_text = page["extract"]

    if not data_path.exists():
        Path.mkdir(data_path)

    with open(data_path / f"{title}.txt", "w") as fp:
        fp.write(wiki_text)

if OPENAI_API_BASE:
    embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=OPENAI_API_KEY, api_base=OPENAI_API_BASE)
else:
    embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

# load documents
documents = SimpleDirectoryReader("./data_wiki/").load_data()
# build index
index = VectorStoreIndex.from_documents(
    documents, embed_model=embed_model
)

## Define query string, retrieve nodes, and rerank them

In [7]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
from llama_index.core.postprocessor import REBELRerank

### You can define any query string you want here ###
query_str = "Which date did Paul Gauguin arrive in Arles?"
query_bundle = QueryBundle(query_str)

# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=50,
)

# retrieve nodes
retrieved_nodes = retriever.retrieve(query_bundle)

# configure reranker
if OPENAI_API_BASE:
    reranked = REBELRerank(llm=OpenAI(model='gpt-4o', api_base=OPENAI_API_BASE), top_n=2)
else:
    reranked = REBELRerank(llm=OpenAI(model='gpt-4o'), top_n=2)

# rerank nodes
reranked_nodes = reranked.postprocess_nodes(retrieved_nodes, query_bundle)

In [8]:
import pandas as pd
import torch
from IPython.display import display, HTML

def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))


def visualize_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        result_dict = {"Score": node.score, "Text": node.node.get_text()}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))

### Top 3 nodes from initial retrieval:

In [None]:
visualize_nodes(retrieved_nodes[:2])

### Top 3 nodes from reranking

In [None]:
visualize_nodes(reranked_nodes[:2])