In [None]:
%pip install -U torch transformers sentence-transformers

In [71]:
from typing import List
from sentence_transformers import CrossEncoder
import torch
import math

In [72]:
print(torch.cuda.is_available())

True


# Testing bge-reranker-v2-m3

In [73]:
# load model (will auto-download the first time it is run and get cached)
model_name = "BAAI/bge-reranker-v2-m3" # Model size 568M params - https://huggingface.co/BAAI/bge-reranker-v2-m3
reranker = CrossEncoder(model_name, device="cuda" if torch.cuda.is_available() else "cpu")

In [74]:
def rerank(query: str, docs: list[str], top_m: int = 5):
    pairs = [(query, d) for d in docs]  # model expects list of (query, passage) pairs so we create them here
    scores = reranker.predict(pairs)  # get prediction scores for each pair
    ranked = sorted(zip(docs, scores), key=lambda x: x[1], reverse=True) # combine passages and scores and sort them by score
    return ranked[:top_m] 

In [75]:
# example passages to rerank
retrieved_docs = [
    "Thomas Edison invented the electric light bulb in 1879.",
    "Bananas are yellow and grow in tropical regions.",
    "Edison also founded General Electric.",
    "The Wright brothers invented the airplane.",
    "Albert Einstein developed the theory of relativity.",
    "The Great Wall of China is visible from space.",
    "Isaac Newton formulated the laws of motion and universal gravitation.",
    "The capital of France is Paris.",
    "The human body has 206 bones.",
    "The Pacific Ocean is the largest ocean on Earth.",
    "The Mona Lisa was painted by Leonardo da Vinci.",
    "Water boils at 100 degrees Celsius.",
    "Mount Everest is the highest mountain in the world.",
    "Shakespeare wrote many famous plays.",
    "The Amazon rainforest is the largest tropical rainforest.",
    "The speed of light is approximately 299,792 kilometers per second.",
    "The currency of Japan is the yen.",
    "The Eiffel Tower is located in Paris.",
    "The human brain is the most complex organ in the body.",
    "The Great Barrier Reef is the largest coral reef system.",
    "The first manned moon landing was in 1969.",
    "The Statue of Liberty was a gift from France to the United States.",
    "The Nile is the longest river in the world.",
    "The human heart pumps blood throughout the body.",
    "The Taj Mahal is located in India.",
    "The Sahara is the largest hot desert in the world.",
    "The first computer was invented in the 1940s.",
    "The human eye can distinguish about 10 million different colors.",
    "The Colosseum is an ancient amphitheater in Rome.",
    "The Great Depression began in 1929.",
    "The human skeleton provides structure and support to the body.",
    "The Leaning Tower of Pisa is famous for its tilt.",
    "The first successful airplane flight was in 1903.",
    "The human skin is the body's largest organ.",
    "The Golden Gate Bridge is located in San Francisco.",
    "The first telephone was invented by Alexander Graham Bell.",
]

In [76]:
top_docs = rerank("Who invented the light bulb?", retrieved_docs, top_m=10)
top_docs


[('Thomas Edison invented the electric light bulb in 1879.',
  np.float32(0.99463516)),
 ('Isaac Newton formulated the laws of motion and universal gravitation.',
  np.float32(0.004423858)),
 ('Edison also founded General Electric.', np.float32(0.0041850964)),
 ('Albert Einstein developed the theory of relativity.',
  np.float32(0.003697477)),
 ('The first telephone was invented by Alexander Graham Bell.',
  np.float32(0.0020467287)),
 ('The Mona Lisa was painted by Leonardo da Vinci.',
  np.float32(0.00086766673)),
 ('The Wright brothers invented the airplane.', np.float32(0.0002814206)),
 ('Mount Everest is the highest mountain in the world.',
  np.float32(8.0340506e-05)),
 ('The Eiffel Tower is located in Paris.', np.float32(6.339082e-05)),
 ('Shakespeare wrote many famous plays.', np.float32(4.156236e-05))]