In [1]:
import numpy as np
from kannolo import DensePlainHNSW

### Load Data

In [2]:
# MS MARCO DRAGON (dense)
query_path = '/data3/silvio/datasets_numpy/queries/dragon_queries.npy'
index_path = "/data3/silvio/indexes/kannolo/kannolo_dragon_efc_200_m_32_metric_ip"

In [3]:
# Load data
index = DensePlainHNSW.load(index_path) # Index
queries = np.load(query_path) # Queries

In [4]:
# Choose a query
query_id_1 = 1500
query_id_2 = 5000
query_1 = queries[query_id_1]
query_2 = queries[query_id_2]

### Search Queries

In [5]:
# Set search parameters
k = 10
efSearch = 1000

In [6]:
dists_1, ids_1 = index.search(query_1, 10, 200)
dists_2, ids_2 = index.search(query_2, 10, 200)
dists_1 = dists_1.reshape(-1, 10)
ids_1 = ids_1.reshape(-1, 10)
dists_2 = dists_2.reshape(-1, 10)
ids_2 = ids_2.reshape(-1, 10)

### Collect Results

In [7]:
import ir_datasets

In [8]:
# add your ir_dataset dataset string id
ir_dataset_string = "msmarco-passage/dev/small"
# Load the dataset
dataset = ir_datasets.load("msmarco-passage/dev/small")

In [9]:
query_passage_1 = [query for query in dataset.queries_iter()][query_id_1].text
query_passage_2 = [query for query in dataset.queries_iter()][query_id_2].text

In [10]:
documents_passages = dataset.docs_iter()[:]
results_1 = [documents_passages[int(i)].text for i in ids_1[0]]
results_2 = [documents_passages[int(i)].text for i in ids_2[0]]

### Evaluation

In [11]:
import ir_measures
ir_measure = ir_measures.parse_measure("MRR@10")

In [12]:
# Remapping the query ids for metric evaluation
real_query_id_1 = [query for query in dataset.queries_iter()][query_id_1].query_id
real_query_id_2 = [query for query in dataset.queries_iter()][query_id_2].query_id

In [13]:
# Parsing the results for metric evaluation
results_for_metric_1 = []
for dd, ii in zip(dists_1[0], ids_1[0]):
    results_for_metric_1.append(ir_measures.ScoredDoc(real_query_id_1, str(ii), dd))

results_for_metric_2 = []
for dd, ii in zip(dists_2[0], ids_2[0]):
    results_for_metric_2.append(ir_measures.ScoredDoc(real_query_id_2, str(ii), dd))

In [14]:
# Load the qrels (relevance judgments) for the dataset
qrels = dataset.qrels
qrel_1 = [q for q in qrels if q.query_id == real_query_id_1]
qrel_2 = [q for q in qrels if q.query_id == real_query_id_2]

In [15]:
# Compute the MRR@10 metric
print("Metric evaluation for query 1", ir_measures.calc_aggregate([ir_measure], qrel_1, results_for_metric_1))
print("Metric evaluation for query 2", ir_measures.calc_aggregate([ir_measure], qrel_2, results_for_metric_2))

Metric evaluation for query 1 {RR@10: 0.0}
Metric evaluation for query 2 {RR@10: 0.1}


### Display Results

In [16]:
query_passage_1

'temperature in clearwater florida per month'

In [17]:
results_1

['Clearwater: Annual Weather Averages. July is the hottest month in Clearwater with an average temperature of 28°C (83°F) and the coldest is January at 14°C (58°F) with the most daily sunshine hours at 11 in July.he wettest month is June with an average of 133.3mm of rain. The best month to swim in the sea is in August when the average sea temperature is 30°C (86°F).',
 'Clearwater: Annual Weather Averages July is the hottest month in Clearwater with an average temperature of 28 °C (83 °F) and the coldest is January at 14 °C (58 °F) with the most daily sunshine hours at 11 in July. The wettest month is June with an average of 133.3mm of rain. The best month to swim in the sea is in August when the average sea temperature is 30 °C (86 °F).',
 'Clearwater: Annual Weather Averages. July is the hottest month in Clearwater with an average temperature of 28°C (83°F) and the coldest is January at 14°C (58°F) with the most daily sunshine hours at 11 in July. The wettest month is June with an a

##### Dense representations fail in capturing the relevance of all the words. PER MONTH means that the query asks information about all the months in a year. Dense representations focus more on the more general topic, giving as results passages about average temperatures.

In [18]:
query_passage_2

'definition of dignity for kids'

In [19]:
results_2

['Definition of dignity for English Language Learners. : 1  a way of appearing or behaving that suggests seriousness and self-control. : 2  the quality of being worthy of honor or respect.',
 'Dignity is defined as the personal quality of being worthy of honor. An example of dignity is the respect paid to an elder member in the family. pl. -·ties. 1  the quality of being worthy of esteem or honor; worthiness. 2  high repute; honor.  the degree of worth, repute, or honor.',
 'Dignity means being respected for who you are and what you believe in. That’s something that can be done in both big and little ways. Saying please or lending a helping hand are ways of showing dignity. Sharing your toys with others or making a donation to a food bank are ways of showing dignity.',
 'Dignity is defined as the personal quality of being worthy of honor. An example of dignity is the respect paid to an elder member in the family. pl. -·ties. 1  the quality of being worthy of esteem or honor; worthiness

##### Dense representations catch the more generic meaning of definition for kid, unlike sparse representations that get fooled by the word "kid", matching it even if the context is different.