# Rerankers

This is a short notebook showing how to use Answer AI's rerankers library

In [3]:
%pip install "rerankers[transformers]" torch

Collecting rerankers[transformers]
  Obtaining dependency information for rerankers[transformers] from https://files.pythonhosted.org/packages/24/77/a0ea5b36221c89fbb7be9bf28075a57e829d89a72cbee0e69945b4524a7b/rerankers-0.5.3-py3-none-any.whl.metadata
  Downloading rerankers-0.5.3-py3-none-any.whl.metadata (27 kB)
Downloading rerankers-0.5.3-py3-none-any.whl (37 kB)
Installing collected packages: rerankers
Successfully installed rerankers-0.5.3
Note: you may need to restart the kernel to use updated packages.


In [4]:
import time
from utils import view
from rerankers import Reranker

  from .autonotebook import tqdm as notebook_tqdm


## Initialising ReRanker

In [5]:
ranker = Reranker(
  model_name='mixedbread-ai/mxbai-rerank-large-v1',
  model_type='cross-encoder',
  device='mps'
)

Loading TransformerRanker model mixedbread-ai/mxbai-rerank-large-v1
No dtype set
Using dtype torch.float16
Loaded model mixedbread-ai/mxbai-rerank-large-v1
Using device mps.
Using dtype torch.float16.


In [6]:
%%time
sorted_rows = ranker.rank(
  query="I love you",
  docs=["I hate you", "I really like you", "You're not too bad"],
  doc_ids=[0,1,2]
)

CPU times: user 282 ms, sys: 184 ms, total: 466 ms
Wall time: 4.36 s


In [8]:
view(sorted_rows)

[1;35mRankedResults[0m[1m([0m
    [33mresults[0m=[1m[[0m
        [1;35mResult[0m[1m([0m[33mdocument[0m=[1;35mDocument[0m[1m([0m[33mtext[0m=[32m'I really like you'[0m, [33mdoc_id[0m=[1;36m1[0m, [33mmetadata[0m=[1m{[0m[1m}[0m[1m)[0m, [33mscore[0m=[1;36m-1.5400390625[0m, [33mrank[0m=[1;36m1[0m[1m)[0m,
        [1;35mResult[0m[1m([0m[33mdocument[0m=[1;35mDocument[0m[1m([0m[33mtext[0m=[32m"You[0m[32m're not too bad"[0m, [33mdoc_id[0m=[1;36m2[0m, [33mmetadata[0m=[1m{[0m[1m}[0m[1m)[0m, [33mscore[0m=[1;36m-2.8828125[0m, [33mrank[0m=[1;36m2[0m[1m)[0m,
        [1;35mResult[0m[1m([0m[33mdocument[0m=[1;35mDocument[0m[1m([0m[33mtext[0m=[32m'I hate you'[0m, [33mdoc_id[0m=[1;36m0[0m, [33mmetadata[0m=[1m{[0m[1m}[0m[1m)[0m, [33mscore[0m=[1;36m-4.30859375[0m, [33mrank[0m=[1;36m3[0m[1m)[0m
    [1m][0m,
    [33mquery[0m=[32m'I love you'[0m,
    [33mhas_scores[0m=[3;92mTrue[0m
[1m

## Reranking an Olympics dataset

In [9]:
import duckdb
con = duckdb.connect("olympics.duckdb")

In [12]:
view(
  con.query("FROM olympics SELECT index, text").limit(10)
)

┌───────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────
────┐
│ index │                                                     text                                                 
│
│ int64 │                                                   varchar                                                
│
├───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────
────┤
│     [1;36m0[0m │ The [1;36m2024[0m Olympics opened in Paris in spectacular style with thousands of athletes sailing along the River
…  │
│     [1;36m1[0m │ Swapping a stadium for a waterway for the first time to open the [32m"greatest show on Earth"[0m, the near 
four-h…  │
│     [1;36m2[0m │ Blue, white and red fireworks had raised the Tricolore above Austerlitz Bridge before [1;36m6[0m,[1;36m800[0m athletes from
…  │
│     [1;36m3[0m │ There were surprise performances through the ceremony, inclu

In [13]:
from search import Search
s = Search(con)

In [14]:
question = "What things went wrong?"
rows = s.vector_search(question, limit=10).fetchall()

In [15]:
view(rows)

[1m[[0m
    [1m([0m
        [32m'Given the miserable weather after what had been a sunny week in Paris until now, it seemed fitting that [0m
[32mthe storyline at the start of the ceremony was about the arrival of the Olympic flame in Paris not going according [0m
[32mto plan.'[0m,
        [1;36m15[0m,
        [1;36m0.5197256803512573[0m
    [1m)[0m,
    [1m([0m[32m'A lot of the time it was brilliantly frenetic and occasionally emotional. '[0m, [1;36m14[0m, [1;36m0.4570101201534271[0m[1m)[0m,
    [1m([0m
        [32m"At times it was bizarre - one moment Lady Gaga surrounded by pink and black feathers was singing in [0m
[32mFrench, the next Bangladesh's athletes were being introduced on their boat. "[0m,
        [1;36m13[0m,
        [1;36m0.45612600445747375[0m
    [1m)[0m,
    [1m([0m
        [32m"The torchbearer did not get the memo about it not being in the Stade de France, and then Zinedine Zidane's[0m
[32mmetro train broke down while he was

In [16]:
%%time
sorted_rows = ranker.rank(
  query=question,
  docs=[r[0] for r in rows],
  doc_ids=[r[1] for r in rows]
)

CPU times: user 175 ms, sys: 84.1 ms, total: 259 ms
Wall time: 786 ms


In [17]:
view(sorted_rows)

[1;35mRankedResults[0m[1m([0m
    [33mresults[0m=[1m[[0m
        [1;35mResult[0m[1m([0m
            [33mdocument[0m=[1;35mDocument[0m[1m([0m
                [33mtext[0m=[32m'The day had started with major disruption when the French train network was hit by arson [0m
[32mattacks and heavy rain in the evening put paid to the original plan by artistic director Thomas Jolly to use the [0m
[32mParisian sun to "make the water sparkle". '[0m,
                [33mdoc_id[0m=[1;36m4[0m,
                [33mmetadata[0m=[1m{[0m[1m}[0m
            [1m)[0m,
            [33mscore[0m=[1;36m0[0m[1;36m.0321044921875[0m,
            [33mrank[0m=[1;36m1[0m
        [1m)[0m,
        [1;35mResult[0m[1m([0m
            [33mdocument[0m=[1;35mDocument[0m[1m([0m
                [33mtext[0m=[32m"The[0m[32m torchbearer did not get the memo about it not being in the Stade de France, and then [0m
[32mZinedine Zidane's metro train broke down while he 