# Train

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]= "6"

In [8]:
max_seq_length = 512
model_name = "distilbert-base-uncased" 
dataset = "scifact"
dataset_path = f"../beir/datasets/{dataset}"

In [9]:

from sentence_transformers import losses, models, SentenceTransformer
from beir import util, LoggingHandler
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.search.lexical import BM25Search as BM25
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.train import TrainRetriever
import pathlib, os, tqdm
import logging

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

In [10]:
import pickle

with open(f"{dataset_path}/{dataset}_score_repLlama.pickle", 'rb') as f:
    results_dense = pickle.load(f)


# Evaluate-bm25

In [11]:
# Load BM25 scores
import pickle 

with open(f"{dataset_path}/{dataset}_bm25_scores.pickle", 'rb') as f:
    results_bm25 = pickle.load(f)

# Ensemble

In [12]:
def get_maxmin(results):
    max_score = -1
    min_score = 999999
    for q_id, q in results.items():
        for doc_id, score in q.items():
            max_score = max(score, max_score)
            min_score = min(score, min_score)

    return min_score, max_score

# Get range to normalize both
min_distilbert_score, max_distilbert_score = get_maxmin(results_dense)
min_bm25_score, max_bm25_score = get_maxmin(results_bm25)

min_distilbert_score, max_distilbert_score, min_bm25_score, max_bm25_score

(0.668408989906311, 0.9411094188690186, 0.5297587, 120.60852)

In [13]:
# Normalize
def normalize_results(results, min_score, max_score):
    for q_id, q in results.items():
        for doc_id, score in q.items():
            results[q_id][doc_id] = (score-min_score)/(max_score-min_score)

    return results

results = normalize_results(results_dense, min_distilbert_score, max_distilbert_score)
results_bm25 = normalize_results(results_bm25, min_bm25_score, max_bm25_score)
# results

In [14]:
# results_bm25

In [15]:
def ensemble_score(x,y):
    mu = 0.8
    return mu*x + (1-mu)*y

combined_result = {}

for q_id_1, q_1 in results.items():
        combined_result[q_id_1] = {}
        for doc_id_1, score_1 in q_1.items():
            
            score_2 = 0
            if results_bm25[q_id_1].get(doc_id_1,None)!=None:
                score_2 = results_bm25[q_id_1][doc_id_1]
                del results_bm25[q_id_1][doc_id_1] # So that same query-doc pair is not added to combined result twice
            
            combined_score = ensemble_score(score_1, score_2)
            combined_result[q_id_1][doc_id_1] = combined_score


# Now add remaining bm25 results in combined dict
for q_id_2, q_2 in results_bm25.items():
    for doc_id_2, score_2 in q_2.items():
         score_1 = 0
         combined_score = ensemble_score(score_1, score_2)
         combined_result[q_id_1][doc_id_1] = combined_score

In [16]:
from beir.retrieval.search.lexical import BM25Search as BM25
from beir.retrieval.evaluation import EvaluateRetrieval

## elasticsearch settings
hostname = "localhost" #localhost
index_name = dataset+'_1' # scifact
initialize = True # True - Delete existing index and re-index all documents from scratch 

model_bm25 = BM25(index_name=index_name, hostname=hostname, initialize=initialize)
retriever_bm25 = EvaluateRetrieval(model_bm25)

2024-06-02 22:58:07 - Activating Elasticsearch....
2024-06-02 22:58:07 - Elastic Search Credentials: {'hostname': 'localhost', 'index_name': 'scifact_1', 'keys': {'title': 'title', 'body': 'txt'}, 'timeout': 100, 'retry_on_timeout': True, 'maxsize': 24, 'number_of_shards': 'default', 'language': 'english'}
2024-06-02 22:58:07 - Deleting previous Elasticsearch-Index named - scifact_1
2024-06-02 22:58:07 - Unable to create Index in Elastic Search. Reason: ConnectionError(('Connection aborted.', BadStatusLine('ÿ\x00\x00\x00\x00\x00\x00\x00\x01\x7fe\x00tity\r\n'))) caused by: ProtocolError(('Connection aborted.', BadStatusLine('ÿ\x00\x00\x00\x00\x00\x00\x00\x01\x7fe\x00tity\r\n')))
2024-06-02 22:58:09 - Creating fresh Elasticsearch-Index named - scifact_1
2024-06-02 22:58:09 - Unable to create Index in Elastic Search. Reason: ConnectionError(('Connection aborted.', BadStatusLine('ÿ\x00\x00\x00\x00\x00\x00\x00\x01\x7f-\x00ength: 117\r\n'))) caused by: ProtocolError(('Connection aborted.', B

In [17]:

# Loading test set
corpus, queries, qrels = GenericDataLoader(dataset_path).load(split="test")

2024-06-02 22:58:55 - Loading Corpus...


100%|██████████| 5183/5183 [00:00<00:00, 138346.50it/s]

2024-06-02 22:58:55 - Loaded 5183 TEST Documents.
2024-06-02 22:58:55 - Doc Example: {'text': 'Alterations of the architecture of cerebral white matter in the developing human brain can affect cortical development and result in functional disabilities. A line scan diffusion-weighted magnetic resonance imaging (MRI) sequence with diffusion tensor analysis was applied to measure the apparent diffusion coefficient, to calculate relative anisotropy, and to delineate three-dimensional fiber architecture in cerebral white matter in preterm (n = 17) and full-term infants (n = 7). To assess effects of prematurity on cerebral white matter development, early gestation preterm infants (n = 10) were studied a second time at term. In the central white matter the mean apparent diffusion coefficient at 28 wk was high, 1.8 microm2/ms, and decreased toward term to 1.2 microm2/ms. In the posterior limb of the internal capsule, the mean apparent diffusion coefficients at both times were similar (1.2 vers




In [18]:
ndcg, _map, recall, precision = retriever_bm25.evaluate(qrels, combined_result, retriever_bm25.k_values)

2024-06-02 22:59:00 - For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this.
2024-06-02 22:59:00 - 

2024-06-02 22:59:00 - NDCG@1: 0.6400
2024-06-02 22:59:00 - NDCG@3: 0.7214
2024-06-02 22:59:00 - NDCG@5: 0.7438
2024-06-02 22:59:00 - NDCG@10: 0.7647
2024-06-02 22:59:00 - NDCG@100: 0.7819
2024-06-02 22:59:00 - NDCG@1000: 0.7858
2024-06-02 22:59:00 - 

2024-06-02 22:59:00 - MAP@1: 0.6093
2024-06-02 22:59:00 - MAP@3: 0.6908
2024-06-02 22:59:00 - MAP@5: 0.7072
2024-06-02 22:59:00 - MAP@10: 0.7185
2024-06-02 22:59:00 - MAP@100: 0.7226
2024-06-02 22:59:00 - MAP@1000: 0.7227
2024-06-02 22:59:00 - 

2024-06-02 22:59:00 - Recall@1: 0.6093
2024-06-02 22:59:00 - Recall@3: 0.7797
2024-06-02 22:59:00 - Recall@5: 0.8359
2024-06-02 22:59:00 - Recall@10: 0.8937
2024-06-02 22:59:00 - Recall@100: 0.9700
2024-06-02 22:59:00 - Recall@1000: 1.0000
2024-06-02 22:59:00 - 

2024-06-02 22:59:00 - P@1: 0.6400
2024-06-02 22:59:00