# Dense Retrieval for Entity Linking
Goal: evaluate pre-trained DR models for zero-shot EL on text

## 1. Load Corpus

In [1]:
# load entities
import json

data_path = '/ivi/ilps/personal/svakule/spoken_qa/'


with open(data_path+'entities.json', 'r') as fin:
    entities = json.load(fin)

print(len(entities), 'entities')

28497 entities


In [2]:
# a subset of questions from https://github.com/askplatypus/wikidata-simplequestions
from beir.datasets.data_loader import GenericDataLoader

dataset = 'WD18/'
split = 'valid'

# query_path = data_path + dataset + "%s_original_questions.jsonl" % split  # original text questions
query_path = data_path + dataset + "%s_wav2vec2-base-960h.jsonl" % split  # questions transcribed from synthethised speech

qrels_path = data_path + dataset + "%s.tsv" % split

corpus_path = data_path + dataset + "entities.jsonl"


corpus, queries, qrels = GenericDataLoader(
    corpus_file=corpus_path, 
    query_file=query_path, 
    qrels_file=qrels_path).load_custom()

## 2. Evaluate with BEIR

In [3]:
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval import models

In [5]:
# TAS-B trained on original WD18
model_path = '/ivi/ilps/personal/svakule/msmarco/output/msmarco-distilbert-base-tas-b-WD18'
model = DRES(models.SentenceBERT(model_path))
retriever = EvaluateRetrieval(model, score_function="dot")
results1 = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results1, retriever.k_values)
acc = precision['P@1']
print(acc)

HBox(children=(FloatProgress(value=0.0, description='Batches', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='Batches', max=223.0, style=ProgressStyle(description_widt…


0.42857


In [6]:
# TAS-B winner!
model = DRES(models.SentenceBERT("msmarco-distilbert-base-tas-b"))
retriever = EvaluateRetrieval(model, score_function="dot")
results1 = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results1, retriever.k_values)
acc = precision['P@1']
print(acc)

HBox(children=(FloatProgress(value=0.0, description='Batches', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='Batches', max=223.0, style=ProgressStyle(description_widt…


0.43461


In [7]:
# DistilBERT v3 cosine
model = DRES(models.SentenceBERT("msmarco-distilbert-base-v3"))
retriever = EvaluateRetrieval(model, score_function="cos_sim")
results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
precision['P@1']

HBox(children=(FloatProgress(value=0.0, description='Batches', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='Batches', max=223.0, style=ProgressStyle(description_widt…




0.43159

In [8]:
# DistilBERT dot
model = DRES(models.SentenceBERT("msmarco-distilbert-base-dot-prod-v3"))
retriever = EvaluateRetrieval(model, score_function="dot")
results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
precision['P@1']

HBox(children=(FloatProgress(value=0.0, description='Batches', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='Batches', max=223.0, style=ProgressStyle(description_widt…




0.41449

In [9]:
# DistilBERT v2
model = DRES(models.SentenceBERT("msmarco-distilbert-base-v2"))
retriever = EvaluateRetrieval(model, score_function="cos_sim")
results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
precision['P@1']

HBox(children=(FloatProgress(value=0.0, description='Batches', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='Batches', max=223.0, style=ProgressStyle(description_widt…




0.39537

In [10]:
# ANCE
model = DRES(models.SentenceBERT("msmarco-roberta-base-ance-fristp"))
retriever = EvaluateRetrieval(model, score_function="dot")
results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)
precision['P@1']

HBox(children=(FloatProgress(value=0.0, description='Batches', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='Batches', max=223.0, style=ProgressStyle(description_widt…




0.41549

## 3. Error Analysis

In [11]:
import random

top_k = 10
n_times = 1000

for _ in range(n_times):
    query_id, ranking_scores = random.choice(list(results1.items()))
    scores_sorted = sorted(ranking_scores.items(), key=lambda item: item[1], reverse=True)
    correct_id = list(qrels[query_id].keys())[0]
    correct_label = entities[correct_id]
    for rank in range(top_k):
        doc_id = scores_sorted[rank][0]
        if doc_id == correct_id and rank==0:
            break
        if corpus[doc_id].get("text") == correct_label and rank==0:
            break
        else:
            if rank==0:
                print("\nQuery : %s, %s" % (queries[query_id], correct_label))
            print("Rank %d: %s - %s" % (rank+1, doc_id, corpus[doc_id].get("text")))
            if doc_id == correct_id:
                print('Correct!')
#             break


Query : what language is here without you compose in, Here Without You
Rank 1: Q1485266 - In All Languages
Rank 2: Q1032020 - Here Without You
Correct!
Rank 3: Q6486678 - Language Says It All
Rank 4: Q36834 - composer
Rank 5: Q595310 - Song Without End
Rank 6: Q7062126 - Not Mathematics
Rank 7: Q1428637 - spoken word
Rank 8: Q7062305 - Not from There
Rank 9: Q2518658 - Never Breathe What You Can't See
Rank 10: Q12404154 - Nobody Nowhere

Query : what languages pompo coin, Pom Poko
Rank 1: Q36471 - Pompei
Rank 2: Q167206 - Pom Poko
Correct!
Rank 3: Q3708701 - Pompey
Rank 4: Q548990 - Poco
Rank 5: Q671458 - Pompano Beach
Rank 6: Q81345 - Positano
Rank 7: Q1485266 - In All Languages
Rank 8: Q1805538 - Lanny Poffo
Rank 9: Q6428599 - Komparu Zempō
Rank 10: Q6664417 - Pongsak Pongsuwan

Query : what types of muvi is played by the sinematic orchestra, The Cinematic Orchestra
Rank 1: Q531578 - Muzaffer Tema
Rank 2: Q1955333 - Musiq Soulchild
Rank 3: Q311181 - Miyavi
Rank 4: Q6933449 - Mukha
R

Rank 5: Q1767472 - House of Angels
Rank 6: Q105285 - House of York
Rank 7: Q1710013 - Joy of Living
Rank 8: Q58389 - House of Bourbon
Rank 9: Q2311068 - City of Joy
Rank 10: Q2022584 - Rave Un2 the Joy Fantastic

Query : what country does goharto take place, Taboo
Rank 1: Q1740603 - Khartoum
Rank 2: Q1963 - Khartoum
Rank 3: Q1011412 - Gojira
Rank 4: Q3816358 - Kongo
Rank 5: Q1754454 - Country
Rank 6: Q263366 - Gösta Bohman
Rank 7: Q5584335 - Gor
Rank 8: Q218861 - Gondar
Rank 9: Q1533479 - Gob
Rank 10: Q7752174 - The Moon of Gomrath

Query : what is bruce mac donald film director s nationality, Bruce McDonald
Rank 1: Q2526255 - film director
Rank 2: Q1249881 - Bruce McDonald
Correct!
Rank 3: Q7342447 - Robert Bruce
Rank 4: Q5216900 - Daniel D. Bruce
Rank 5: Q4978000 - Bruce McNall
Rank 6: Q2680 - Bruce Willis
Rank 7: Q7173009 - Peter Bruce
Rank 8: Q4978228 - Bruce Rogers
Rank 9: Q4978221 - Bruce Rogers
Rank 10: Q4977722 - Bruce Jackson

Query : what is the nationality of parkal mac dona


Query : which county is mnt vernenen, Mount Vernon
Rank 1: Q486229 - Hennepin County
Rank 2: Q188661 - Neamț County
Rank 3: Q1527 - Minnesota
Rank 4: Q191653 - Teleorman County
Rank 5: Q829991 - Jo Coenen
Rank 6: Q494460 - Kootenai County
Rank 7: Q1136502 - Minnesota River
Rank 8: Q280844 - McKenzie County
Rank 9: Q9970 - Velsen
Rank 10: Q2564998 - Mekonnen Welde Mikael

Query : what was vorosenese's profession, Vauro Senesi
Rank 1: Q316307 - Giovanni Battista Piranesi
Rank 2: Q2401406 - Arthur Voskanyan
Rank 3: Q42939 - Bronisław Komorowski
Rank 4: Q1600132 - Roch Voisine
Rank 5: Q1780278 - Marcello Giannini
Rank 6: Q186185 - Kliment Voroshilov
Rank 7: Q123465 - Paolo Agostino
Rank 8: Q121405 - Giovanni Paolo Colonna
Rank 9: Q3426 - Voronezh
Rank 10: Q2066992 - The Italian Job

Query : what asteroid group is eleven thousand one hundred and sixty one dibos at sua part of, 11161 Daibosatsu
Rank 1: Q3863 - asteroid
Rank 2: Q5285040 - Diyo Sibisi
Rank 3: Q2179 - asteroid belt
Rank 4: Q38

Correct!
Rank 4: Q7857806 - Twenty One Pilots
Rank 5: Q7721831 - The Celestial Omnibus
Rank 6: Q7633614 - Suddenly, Tammy!
Rank 7: Q190151 - Bruce Dickinson
Rank 8: Q752989 - Nine
Rank 9: Q709206 - 8460 Imainamahoe
Rank 10: Q993512 - Ev'rything's Coming Up Dusty

Query : what kind of music is as the attics known for, Aziatix
Rank 1: Q3503207 - Toys in the Attic
Rank 2: Q286080 - acoustic music
Rank 3: Q20502 - house music
Rank 4: Q1988431 - This Is Our Music
Rank 5: Q487965 - industrial music
Rank 6: Q2880232 - Music
Rank 7: Q7752757 - The Music Tapes
Rank 8: Q37073 - pop music
Rank 9: Q373342 - popular music
Rank 10: Q1196752 - traditional pop music

Query : is richard bluemanta male or female, Richard Blumenthal
Rank 1: Q6581097 - male
Rank 2: Q2015601 - The Boy in Blue
Rank 3: Q1493156 - Looking for Richard
Rank 4: Q982586 - Richard Williams
Rank 5: Q51506 - Richard Attenborough
Rank 6: Q339593 - Richard Long
Rank 7: Q976609 - Richard Marks
Rank 8: Q442233 - Richard H. Truly
Rank 9: