In [1]:
from beir import util, LoggingHandler

import logging
import pathlib, os

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

  from tqdm.autonotebook import tqdm


In [2]:
import pathlib, os
from beir import util

dataset = "scifact"
url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset)
out_dir = os.path.join(os.getcwd(), "datasets")
data_path = util.download_and_unzip(url, out_dir)
print("Dataset downloaded here: {}".format(data_path))

Dataset downloaded here: /mnt/c/D_drive/UCSD/Quarters/Q3/DSC253-Adv_txt_mining/Project/slm4search/test/datasets/scifact


In [3]:
from beir.datasets.data_loader import GenericDataLoader

data_path = "datasets/scifact"
corpus, queries, qrels = GenericDataLoader(data_path).load(split="test") # or split = "train" or "dev"

2024-05-28 20:56:07 - Loading Corpus...


100%|██████████| 5183/5183 [00:00<00:00, 22708.84it/s]

2024-05-28 20:56:08 - Loaded 5183 TEST Documents.
2024-05-28 20:56:08 - Doc Example: {'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 vers




In [4]:
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval import models
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES

#### Dense Retrieval using SBERT (Sentence-BERT) ####
#### Provide any pretrained sentence-transformers model
#### The model was fine-tuned using cosine-similarity.
#### Complete list - https://www.sbert.net/docs/pretrained_models.html

model_name = "distilbert-base-uncased" 

model_save_path = os.path.join(os.getcwd(), "output", "{}-v2-{}-bm25-hard-negs".format(model_name, dataset))

model = DRES(models.SentenceBERT(model_save_path), batch_size=128)
retriever = EvaluateRetrieval(model, score_function="cos_sim")

#### Retrieve dense results (format of results is identical to qrels)
results = retriever.retrieve(corpus, queries)

2024-05-28 20:56:11 - PyTorch version 2.3.0 available.
2024-05-28 20:56:11 - JAX version 0.4.23 available.
2024-05-28 20:56:12 - Loading faiss with AVX2 support.
2024-05-28 20:56:12 - Successfully loaded faiss with AVX2 support.
2024-05-28 20:56:14 - Use pytorch device_name: cuda
2024-05-28 20:56:14 - Load pretrained SentenceTransformer: distilbert-base-uncased
2024-05-28 20:56:14 - No sentence-transformers model found with name distilbert-base-uncased. Creating a new one with mean pooling.




2024-05-28 20:56:20 - Encoding Queries...


Batches: 100%|██████████| 3/3 [00:03<00:00,  1.27s/it]


2024-05-28 20:56:24 - Sorting Corpus by document length (Longest first)...
2024-05-28 20:56:24 - Scoring Function: Cosine Similarity (cos_sim)
2024-05-28 20:56:24 - Encoding Batch 1/1...


Batches: 100%|██████████| 41/41 [00:47<00:00,  1.15s/it]


In [5]:
#### Evaluate your retrieval using NDCG@k, MAP@K ...

logging.info("Retriever evaluation for k in: {}".format(retriever.k_values))
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)

2024-05-28 20:57:53 - Retriever evaluation for k in: [1, 3, 5, 10, 100, 1000]
2024-05-28 20:57:53 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2024-05-28 20:57:53 - 

2024-05-28 20:57:53 - NDCG@1: 0.0433
2024-05-28 20:57:53 - NDCG@3: 0.0723
2024-05-28 20:57:53 - NDCG@5: 0.0780
2024-05-28 20:57:53 - NDCG@10: 0.0959
2024-05-28 20:57:53 - NDCG@100: 0.1452
2024-05-28 20:57:53 - NDCG@1000: 0.1977
2024-05-28 20:57:53 - 

2024-05-28 20:57:53 - MAP@1: 0.0392
2024-05-28 20:57:53 - MAP@3: 0.0628
2024-05-28 20:57:53 - MAP@5: 0.0659
2024-05-28 20:57:53 - MAP@10: 0.0736
2024-05-28 20:57:53 - MAP@100: 0.0817
2024-05-28 20:57:53 - MAP@1000: 0.0835
2024-05-28 20:57:53 - 

2024-05-28 20:57:53 - Recall@1: 0.0392
2024-05-28 20:57:53 - Recall@3: 0.0919
2024-05-28 20:57:53 - Recall@5: 0.1061
2024-05-28 20:57:53 - Recall@10: 0.1589
2024-05-28 20:57:53 - Recall@100: 0.4054
2024-05-28 20:57:53 - Recall@1000: 0.8204

In [None]:
import random

#### Print top-k documents retrieved ####
top_k = 10

query_id, ranking_scores = random.choice(list(results.items()))
scores_sorted = sorted(ranking_scores.items(), key=lambda item: item[1], reverse=True)
logging.info("Query : %s\n" % queries[query_id])

for rank in range(top_k):
    doc_id = scores_sorted[rank][0]
    # Format: Rank x: ID [Title] Body
    logging.info("Rank %d: %s [%s] - %s\n" % (rank+1, doc_id, corpus[doc_id].get("title"), corpus[doc_id].get("text")))