In [58]:
import os
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from tqdm.auto import tqdm
import huggingface_hub as hf
from dotenv import load_dotenv
import matplotlib.pyplot as plt
from typing import List, Dict, Union, Tuple
from transformers import AutoTokenizer, AutoModel

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 512)

plt.style.use('seaborn-v0_8')
load_dotenv()
hf.login(os.environ["HF_TOKEN"])
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
print("CUDA_VISIBLE_DEVICES:", os.environ["CUDA_VISIBLE_DEVICES"], "HF_HOME:", os.environ["HF_HOME"])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/mohsenfayyaz/.cache/huggingface/token
Login successful
CUDA_VISIBLE_DEVICES: 7 HF_HOME: /local1/mohsenfayyaz/.hfcache/


In [59]:
# DATASET = "re-docred_facebook--contriever-msmarco_7170.pkl"
DATASET = "re-docred_facebook--dragon-plus-query-encoder_7170.pkl"
# DATASET = "re-docred_OpenMatch--cocodr-base-msmarco_7170.pkl.gz"

hf.hf_hub_download(repo_id="Retriever-Contextualization/datasets", filename=f"results/{DATASET}", repo_type="dataset", local_dir="hf/")
df_raw = pd.read_pickle(f"./hf/results/{DATASET}")
print(df_raw.attrs)
df_raw.head(1)

{'model': 'facebook/dragon-plus-query-encoder', 'query_model': 'facebook/dragon-plus-query-encoder', 'context_model': 'facebook/dragon-plus-context-encoder', 'pooling': 'cls', 'dataset': 're-docred', 'corpus_size': 105925, 'eval': {'ndcg': {'NDCG@1': 0.47685, 'NDCG@3': 0.52523, 'NDCG@5': 0.53646, 'NDCG@10': 0.54955, 'NDCG@100': 0.58002, 'NDCG@1000': 0.59556}, 'map': {'MAP@1': 0.47685, 'MAP@3': 0.51341, 'MAP@5': 0.51959, 'MAP@10': 0.52496, 'MAP@100': 0.53058, 'MAP@1000': 0.53109}, 'recall': {'Recall@1': 0.47685, 'Recall@3': 0.55941, 'Recall@5': 0.58689, 'Recall@10': 0.62748, 'Recall@100': 0.77741, 'Recall@1000': 0.90349}, 'precision': {'P@1': 0.47685, 'P@3': 0.18647, 'P@5': 0.11738, 'P@10': 0.06275, 'P@100': 0.00777, 'P@1000': 0.0009}}}


Unnamed: 0,query_id,query,gold_docs,gold_docs_text,scores_stats,scores_gold,scores_1000,predicted_docs_text_10,id,title,vertexSet,labels,sents,split,label,label_idx,head_entity,tail_entity,head_entity_names,tail_entity_names,head_entity_longest_name,tail_entity_longest_name,head_entity_types,tail_entity_types,evidence_sent_ids,evidence_sents,head_entity_in_evidence,tail_entity_in_evidence,relation,relation_name,query_question,duplicate_titles_len,duplicate_titles,hit_rank,gold_doc,gold_doc_title,gold_doc_text,gold_doc_score,pred_doc,pred_doc_title,pred_doc_text,pred_doc_score,gold_doc_len,pred_doc_len,query_decompx_tokens,query_decompx_tokenizer_word_ids,query_decompx_cls_or_mean_pooled,query_decompx_tokens_dot_scores,query_decompx_decompx_last_layer_pooled,gold_doc_decompx_tokens,gold_doc_decompx_tokenizer_word_ids,gold_doc_decompx_cls_or_mean_pooled,gold_doc_decompx_tokens_dot_scores,gold_doc_decompx_decompx_last_layer_pooled,pred_doc_decompx_tokens,pred_doc_decompx_tokenizer_word_ids,pred_doc_decompx_cls_or_mean_pooled,pred_doc_decompx_tokens_dot_scores,pred_doc_decompx_decompx_last_layer_pooled
0,test0,When was Loud Tour published?,[Loud Tour],"{'Loud Tour': {'text': 'The Loud Tour was the fourth overall and third world concert tour by Barbadian recording artist Rihanna . Performing in over twenty countries in the Americas and Europe , the tour was launched in support of Rihanna 's fifth studio album Loud ( 2010 ) . Critics acclaimed the show for its liveliness and higher caliber of quality when compared to Rihanna 's previous tours . The Loud Tour was a large commercial success , experiencing demand for an extension of shows in the United Kin...","{'len': 1000, 'max': 390.3378601074219, 'min': 377.525390625, 'std': 1.243663421340353, 'mean': 378.77503692626954, 'median': 378.4281463623047}",{'Loud Tour': 390.3378601074219},"{'Loud Tour': 390.3378601074219, 'Loud'n'proud': 385.71905517578125, 'Poetry Bus Tour': 385.4292907714844, 'Live &amp; Loud': 384.18218994140625, 'The Loudest Engine': 384.0265808105469, 'Young Wild Things Tour': 383.8572998046875, 'Guitar Rock Tour': 383.77392578125, 'Live (Front Line Assembly album)': 383.2346496582031, 'The Devils of Loudun': 383.2079772949219, 'Perpetual Motion Roadshow': 383.1563415527344, 'Get Sleazy Tour': 383.128662109375, 'In Visible Silence': 383.0901794433594, 'Indestructible...","{'Loud Tour': {'text': 'The Loud Tour was the fourth overall and third world concert tour by Barbadian recording artist Rihanna . Performing in over twenty countries in the Americas and Europe , the tour was launched in support of Rihanna 's fifth studio album Loud ( 2010 ) . Critics acclaimed the show for its liveliness and higher caliber of quality when compared to Rihanna 's previous tours . The Loud Tour was a large commercial success , experiencing demand for an extension of shows in the United Kin...",test0,Loud Tour,"[[{'name': 'Loud', 'pos': [23, 24], 'sent_id': 1, 'type': 'MISC', 'global_pos': [41, 41], 'index': '0_0'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 6, 'type': 'MISC', 'global_pos': [128, 128], 'index': '0_1'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 0, 'type': 'MISC', 'global_pos': [1, 1], 'index': '0_2'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 3, 'type': 'MISC', 'global_pos': [67, 67], 'index': '0_3'}], [{'name': 'Barbadian', 'pos': [13, 14], 'sent_id': 0, 'type': 'LOC', 'global...","[{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}, {'r': 'P175', 'h': 0, 't': 2, 'evidence': [0, 1]}, {'r': 'P131', 'h': 10, 't': 8, 'evidence': [4]}, {'r': 'P17', 'h': 8, 't': 7, 'evidence': [3, 4]}, {'r': 'P17', 'h': 10, 't': 7, 'evidence': [3, 4]}, {'h': 2, 't': 1, 'r': 'P27', 'evidence': []}, {'h': 8, 't': 5, 'r': 'P30', 'evidence': []}, {'h': 0, 't': 14, 'r': 'P577', 'evidence': []}, {'h': 2, 't': 0, 'r': 'P800', 'evidence': [0, 1]}, {'h': 8, 't': 7, 'r': 'P131', 'evidence': [3, 4]}, {'h': 10, 't': 7...","[[The, Loud, Tour, was, the, fourth, overall, and, third, world, concert, tour, by, Barbadian, recording, artist, Rihanna, .], [Performing, in, over, twenty, countries, in, the, Americas, and, Europe, ,, the, tour, was, launched, in, support, of, Rihanna, 's, fifth, studio, album, Loud, (, 2010, ), .], [Critics, acclaimed, the, show, for, its, liveliness, and, higher, caliber, of, quality, when, compared, to, Rihanna, 's, previous, tours, .], [The, Loud, Tour, was, a, large, commercial, success, ,, expe...",test,"{'r': 'P577', 'h': 0, 't': 6, 'evidence': [1]}",0,"[{'name': 'Loud', 'pos': [23, 24], 'sent_id': 1, 'type': 'MISC', 'global_pos': [41, 41], 'index': '0_0'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 6, 'type': 'MISC', 'global_pos': [128, 128], 'index': '0_1'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 0, 'type': 'MISC', 'global_pos': [1, 1], 'index': '0_2'}, {'name': 'Loud Tour', 'pos': [1, 3], 'sent_id': 3, 'type': 'MISC', 'global_pos': [67, 67], 'index': '0_3'}]","[{'pos': [25, 26], 'type': 'TIME', 'sent_id': 1, 'name': '2010', 'global_pos': [43, 43], 'index': '6_0'}]","{Loud, Loud Tour}",{2010},Loud Tour,2010,{MISC},{TIME},[1],"[[Performing, in, over, twenty, countries, in, the, Americas, and, Europe, ,, the, tour, was, launched, in, support, of, Rihanna, 's, fifth, studio, album, Loud, (, 2010, ), .]]","[{'name': 'Loud', 'pos': [23, 24], 'sent_id': 1, 'type': 'MISC', 'global_pos': [41, 41], 'index': '0_0'}]","[{'pos': [25, 26], 'type': 'TIME', 'sent_id': 1, 'name': '2010', 'global_pos': [43, 43], 'index': '6_0'}]",P577,publication date,When was Loud Tour published?,0,{},1.0,"Loud Tour The Loud Tour was the fourth overall and third world concert tour by Barbadian recording artist Rihanna . Performing in over twenty countries in the Americas and Europe , the tour was launched in support of Rihanna 's fifth studio album Loud ( 2010 ) . Critics acclaimed the show for its liveliness and higher caliber of quality when compared to Rihanna 's previous tours . The Loud Tour was a large commercial success , experiencing demand for an extension of shows in the United Kingdom due to po...",Loud Tour,"The Loud Tour was the fourth overall and third world concert tour by Barbadian recording artist Rihanna . Performing in over twenty countries in the Americas and Europe , the tour was launched in support of Rihanna 's fifth studio album Loud ( 2010 ) . Critics acclaimed the show for its liveliness and higher caliber of quality when compared to Rihanna 's previous tours . The Loud Tour was a large commercial success , experiencing demand for an extension of shows in the United Kingdom due to popularity ....",390.33786,"Loud Tour The Loud Tour was the fourth overall and third world concert tour by Barbadian recording artist Rihanna . Performing in over twenty countries in the Americas and Europe , the tour was launched in support of Rihanna 's fifth studio album Loud ( 2010 ) . Critics acclaimed the show for its liveliness and higher caliber of quality when compared to Rihanna 's previous tours . The Loud Tour was a large commercial success , experiencing demand for an extension of shows in the United Kingdom due to po...",Loud Tour,"The Loud Tour was the fourth overall and third world concert tour by Barbadian recording artist Rihanna . Performing in over twenty countries in the Americas and Europe , the tour was launched in support of Rihanna 's fifth studio album Loud ( 2010 ) . Critics acclaimed the show for its liveliness and higher caliber of quality when compared to Rihanna 's previous tours . The Loud Tour was a large commercial success , experiencing demand for an extension of shows in the United Kingdom due to popularity ....",390.33786,142,142,"[[CLS], when, was, loud, tour, published, ?, [SEP]]","[None, 0, 1, 2, 3, 4, 4, None]","[-0.17805682, -0.3927267, 0.34883702, -0.38739026, -0.23735791, -0.19460969, 0.21865264, 0.068975255, -0.1592264, 0.18711175, -0.20565934, 0.003034133, -0.18440822, 0.40548998, -0.4549966, 0.51666415, 0.09620502, -0.1836627, -0.4205021, -0.010630409, 0.26190794, 0.0625493, 0.088519946, 0.32873702, 0.09472372, 0.26387376, -0.2193913, 0.08308564, -0.20197082, -0.044599533, 0.30508837, 0.055685997, -0.083300106, 0.21956952, -0.54765826, 0.59977865, -0.28916034, -0.2208723, -0.1483896, 0.061622858, 0.077223...","[2.2196622, 6.71451, 0.9866385, 58.316944, 37.08578, 4.3126516, 1.2738111, -1.2307678]","[[0.0026502553, 0.044497166, 0.009840142, -0.029498188, 0.047593728, 0.0005243204, 0.089234896, -0.058340102, -0.0002567456, -0.06561515, 0.012288873, -0.018892672, 0.0068592615, 0.031180702, 0.027442973, -0.06405719, 0.007814868, -0.030438174, 0.026202774, -0.02576437, -0.04084836, -0.04383885, -0.00077837193, 0.02230208, 0.047516573, 0.09106402, -0.034369394, -0.058045126, -0.036178418, 0.053643636, 0.03031061, 0.027056862, 0.013645681, -0.0111004235, 0.011399071, -0.03626854, -0.059553638, 0.02507680...","[[CLS], loud, tour, the, loud, tour, was, the, fourth, overall, and, third, world, concert, tour, by, bar, ##bad, ##ian, recording, artist, rihanna, ., performing, in, over, twenty, countries, in, the, americas, and, europe, ,, the, tour, was, launched, in, support, of, rihanna, ', s, fifth, studio, album, loud, (, 2010, ), ., critics, acclaimed, the, show, for, its, live, ##liness, and, higher, caliber, of, quality, when, compared, to, rihanna, ', s, previous, tours, ., the, loud, tour, was, a, large, ...","[None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, ...]","[-0.7096514, -0.43747085, 2.078466, -0.8606712, 2.3640666, 0.67811525, 3.0262432, 0.6547275, -1.481939, -2.838817, -1.2552446, 1.0732918, -3.318883, 3.0607197, -0.41772836, 3.4470546, 3.6913419, 0.77499884, 1.0027949, -1.8230458, 0.37280822, -1.2724396, 2.8919466, 1.0510181, 1.1511157, 1.0471345, 2.2961628, -0.6315758, -3.7429647, 1.7633313, 2.213553, 0.48526624, 2.730598, -1.8187449, -2.2764158, 1.4145939, -2.7238777, -1.5529062, -0.89042926, -2.9624236, -1.3753296, -2.6471179, -1.9109141, 0.7354509, -...","[650.5565, 112.46794, 110.70713, 35.217003, 88.90661, 93.24184, 93.337906, 43.745255, 70.52942, 71.49595, 12.454085, 71.52813, 16.037241, 15.657524, 53.929047, 21.389343, 5.0486135, 39.502014, 38.76303, 1.5653663, 10.186192, 42.767452, 3.310997, 14.8039875, 25.571796, 4.447118, 19.376839, 8.570304, 29.799927, 15.695527, 8.623252, 22.522247, 1.3768623, 26.493742, 27.494938, 14.446539, 20.635506, 34.00284, 28.572052, 71.82972, 22.956003, 10.642702, 9.421131, 8.399438, 1.4503288, 6.0951405, 1.9697564, 40.7...","[[-0.06098142, 0.030208647, 0.35368052, -0.15786159, 0.43346453, 0.0317666, 0.49806064, 0.11205646, -0.21001092, -0.54779494, -0.16660528, 0.20015034, -0.5065915, 0.43021473, -0.07829579, 0.51261973, 0.5002409, 0.08070024, 0.15372154, -0.20520967, -0.018419577, -0.22845978, 0.4219108, 0.2132149, 0.25129104, 0.19566487, 0.3559674, -0.1225869, -0.52057284, 0.39532727, 0.270124, 0.046868116, 0.425288, -0.26911294, -0.35279825, 0.22168818, -0.41953656, -0.20435122, -0.12168454, -0.41875803, -0.21550342, -0....","[[CLS], loud, tour, the, loud, tour, was, the, fourth, overall, and, third, world, concert, tour, by, bar, ##bad, ##ian, recording, artist, rihanna, ., performing, in, over, twenty, countries, in, the, americas, and, europe, ,, the, tour, was, launched, in, support, of, rihanna, ', s, fifth, studio, album, loud, (, 2010, ), ., critics, acclaimed, the, show, for, its, live, ##liness, and, higher, caliber, of, quality, when, compared, to, rihanna, ', s, previous, tours, ., the, loud, tour, was, a, large, ...","[None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, ...]","[-0.7096514, -0.43747085, 2.078466, -0.8606712, 2.3640666, 0.67811525, 3.0262432, 0.6547275, -1.481939, -2.838817, -1.2552446, 1.0732918, -3.318883, 3.0607197, -0.41772836, 3.4470546, 3.6913419, 0.77499884, 1.0027949, -1.8230458, 0.37280822, -1.2724396, 2.8919466, 1.0510181, 1.1511157, 1.0471345, 2.2961628, -0.6315758, -3.7429647, 1.7633313, 2.213553, 0.48526624, 2.730598, -1.8187449, -2.2764158, 1.4145939, -2.7238777, -1.5529062, -0.89042926, -2.9624236, -1.3753296, -2.6471179, -1.9109141, 0.7354509, -...","[650.5565, 112.46794, 110.70713, 35.217003, 88.90661, 93.24184, 93.337906, 43.745255, 70.52942, 71.49595, 12.454085, 71.52813, 16.037241, 15.657524, 53.929047, 21.389343, 5.0486135, 39.502014, 38.76303, 1.5653663, 10.186192, 42.767452, 3.310997, 14.8039875, 25.571796, 4.447118, 19.376839, 8.570304, 29.799927, 15.695527, 8.623252, 22.522247, 1.3768623, 26.493742, 27.494938, 14.446539, 20.635506, 34.00284, 28.572052, 71.82972, 22.956003, 10.642702, 9.421131, 8.399438, 1.4503288, 6.0951405, 1.9697564, 40.7...","[[-0.06098142, 0.030208647, 0.35368052, -0.15786159, 0.43346453, 0.0317666, 0.49806064, 0.11205646, -0.21001092, -0.54779494, -0.16660528, 0.20015034, -0.5065915, 0.43021473, -0.07829579, 0.51261973, 0.5002409, 0.08070024, 0.15372154, -0.20520967, -0.018419577, -0.22845978, 0.4219108, 0.2132149, 0.25129104, 0.19566487, 0.3559674, -0.1225869, -0.52057284, 0.39532727, 0.270124, 0.046868116, 0.425288, -0.26911294, -0.35279825, 0.22168818, -0.41953656, -0.20435122, -0.12168454, -0.41875803, -0.21550342, -0...."


In [60]:
def generate_query_question(head_entity, tail_entity, relation_type):
    relation_mapping = {
        'P6': f'Who is the head of government of {head_entity}?',                 # Invertible
        'P17': f'Which country is {head_entity} associated with?',
        'P19': f'Where was {head_entity} born?',
        'P20': f'Where did {head_entity} die?',
        'P22': f'Who is the father of {head_entity}?',
        'P25': f'Who is the mother of {head_entity}?',
        'P26': f'Who is the spouse of {head_entity}?',                            # Invertible
        'P27': f'Which country is {head_entity} a citizen of?',                   # Could have multiple answers
        'P30': f'Which continent is {head_entity} part of?',
        'P31': f'What is {head_entity} an instance of?',                          # Could have multiple answers
        'P35': f'Who is the head of state of {head_entity}?',                     # Invertible
        'P36': f'What is the capital of {head_entity}?',                          # Invertible
        'P37': f'What is the official language of {head_entity}?',
        'P39': f'What position did {head_entity} hold?',                          # Could have multiple answers
        'P40': f'Who are the children of {head_entity}?',                         # Could have multiple answers
        'P50': f'Who is the author of {head_entity}?',                            # Could have multiple answers (not likely in DocRED)
        'P54': f'Which sports team is {head_entity} a member of?',                # Could have multiple answers (not likely in DocRED)
        'P57': f'Who directed {head_entity}?',                                    # Could have multiple answers (not likely in DocRED)
        'P58': f'Who wrote the screenplay for {head_entity}?',                    # Could have multiple answers (not likely in DocRED)
        'P69': f'Where was {head_entity} educated?',                              # Could have multiple answers
        'P86': f'Who composed the music for {head_entity}?',
        'P102': f'Which political party is {head_entity} a member of?',           # Could have multiple answers (not likely in DocRED)
        'P108': f'Who is the employer of {head_entity}?',
        'P112': f'Who founded {head_entity}?',                                    # Could have multiple answers, Invertible
        'P118': f'Which league is {head_entity} part of?',
        'P123': f'Who is the publisher of {head_entity}?',
        'P127': f'Who owns {head_entity}?',                                       # Could have multiple answers
        'P131': f'Which administrative territorial entity is {head_entity} located in?', # Could have multiple answers
        'P136': f'What genre does {head_entity} belong to?',                      # Could have multiple answers (not likely in DocRED)
        'P137': f'Who operates {head_entity}?',
        'P140': f'What is the religion of {head_entity}?',                        # Could have multiple answers (not likely in DocRED)
        # 'P150': f'What administrative territorial entity is contained within {head_entity}?', # Extreme Many-to-many (DISCARD THIS TYPE)
        'P155': f'What precedes {head_entity}?',
        'P156': f'What follows {head_entity}?',
        'P159': f'Where is the headquarters of {head_entity} located?',
        'P161': f'Who is a cast member of {head_entity}?',
        'P162': f'Who produced {head_entity}?',                                   # Could have multiple answers
        'P166': f'What award did {head_entity} receive?',                         # Could have multiple answers (not likely in DocRED)
        'P170': f'Who created {head_entity}?',                                    # Could have multiple answers (not likely in DocRED)
        'P171': f'What is the parent taxon of {head_entity}?',                    # Could have multiple answers (not likely in DocRED)
        'P172': f'What is the ethnic group of {head_entity}?',                    # Could have multiple answers (not likely in DocRED)
        'P175': f'Who performed {head_entity}?',
        'P176': f'Who manufactured {head_entity}?',
        'P178': f'Who developed {head_entity}?',
        'P179': f'What series is {head_entity} part of?',
        'P190': f'What is the sister city of {head_entity}?',
        'P194': f'What is the legislative body of {head_entity}?',
        'P205': f'What country is the basin of {head_entity}?',
        'P206': f'Where is {head_entity} located in or next to a body of water?',
        'P241': f'Which military branch is {head_entity} part of?',
        'P264': f'Which record label is {head_entity} associated with?',
        'P272': f'Which production company produced {head_entity}?',
        'P276': f'Where is {head_entity} located?',
        'P279': f'What is {head_entity} a subclass of?',
        'P355': f'What is the subsidiary of {head_entity}?',                      # Could have multiple answers
        'P361': f'What is {head_entity} a part of?',
        'P364': f'What is the original language of {head_entity}?',
        'P400': f'What platform is {head_entity} available on?',                  # Could have multiple answers
        'P403': f'What is the mouth of the watercourse of {head_entity}?',
        'P449': f'What is the original network of {head_entity}?',
        'P463': f'Which organization is {head_entity} a member of?',
        'P488': f'Who is the chairperson of {head_entity}?',
        'P495': f'What is the country of origin of {head_entity}?',
        'P527': f'What are the components of {head_entity}?',                     # Could have multiple answers
        'P551': f'Where is the residence of {head_entity}?',
        'P569': f'When was {head_entity} born?',
        'P570': f'When did {head_entity} die?',
        'P571': f'When was {head_entity} founded?',
        'P576': f'When was {head_entity} dissolved or demolished?',
        'P577': f'When was {head_entity} published?',
        'P580': f'When did {head_entity} start?',
        'P582': f'When did {head_entity} end?',
        'P585': f'When did {head_entity} occur?',
        'P607': f'What conflict was {head_entity} part of?',                      # Could have multiple answers
        'P674': f'Who are the characters in {head_entity}?',                      # Could have multiple answers
        'P676': f'Who wrote the lyrics for {head_entity}?',                       # Could have multiple answers
        'P706': f'Where is {head_entity} located on a terrain feature?',
        # 'P710': f'Who participated in {head_entity}?',                            # Extreme Many-to-many (DISCARD THIS TYPE)
        'P737': f'Who influenced {head_entity}?',
        'P740': f'Where was {head_entity} formed?',
        'P749': f'What is the parent organization of {head_entity}?',
        'P800': f'What is a notable work of {head_entity}?',
        'P807': f'What is {head_entity} separated from?',
        'P840': f'Where does the narrative of {head_entity} take place?',
        'P937': f'Where did {head_entity} work?',
        'P1001': f'Which jurisdiction does {head_entity} apply to?',
        'P1056': f'What does {head_entity} produce?',
        'P1198': f'What is the unemployment rate of {head_entity}?',
        'P1336': f'What territory is claimed by {head_entity}?',
        'P1344': f'What was {head_entity} a participant of?',
        'P1365': f'What does {head_entity} replace?',
        'P1366': f'What replaced {head_entity}?',
        'P1376': f'What is {head_entity} the capital of?',                        # BEWARE OF THE DUPLICATION FOR FILTERING (FACT P36 is the same as this one) FIXED
        'P1412': f'What languages are spoken, written, or signed by {head_entity}?',    # Could have multiple answers
        'P1441': f'In what work does {head_entity} appear?',
        'P3373': f'Who is the sibling of {head_entity}?'                          # Could have multiple answers, Invertible (based on each sibling)
    }
    return relation_mapping[relation_type]

relation_clusters = {
    "Human Entities": [
        "P19", "P20", "P22", "P25", "P26", "P27", "P39", "P40", "P54", "P69", "P102", "P140", "P3373",
        "P569", "P570", "P737", "P172"
    ],
    # "Geographical/Political Entities": [
    #     "P6", "P30", "P36", "P140"
    # ],
    # "Organizations": [
    #     "P127", "P276", "P740", "P749", "P112"
    # ],
    # "Creative Works": [
    #     "P50", "P57", "P58", "P86", "P162", "P175", "P800", "P840", "P1441", "P364", "P449", "P123", "P400"
    # ],
    # "Events": [
    #     "P1344", "P607", "P585", "P1365", "P1366", "P585", "P37", "P585"
    # ],
    # "Objects/Artifacts": [
    #     "P170", "P176", "P178", "P179", "P400", "P1056", "P276"
    # ],
    # "Biological Entities": [
    #     "P171", "P172"
    # ]
}

relation_clusters_relation_to_cluster = {relation: cluster for cluster, relations in relation_clusters.items() for relation in relations}

In [61]:
r_q = {}
for cluster, relations in relation_clusters.items():
    r_q[cluster] = []
    for relation in relations:
        r_q[cluster].append(relation + ":" + generate_query_question("X", "", relation))
    print(cluster, r_q[cluster])
    print()
relation_clusters_relation_to_cluster

Human Entities ['P19:Where was X born?', 'P20:Where did X die?', 'P22:Who is the father of X?', 'P25:Who is the mother of X?', 'P26:Who is the spouse of X?', 'P27:Which country is X a citizen of?', 'P39:What position did X hold?', 'P40:Who are the children of X?', 'P54:Which sports team is X a member of?', 'P69:Where was X educated?', 'P102:Which political party is X a member of?', 'P140:What is the religion of X?', 'P3373:Who is the sibling of X?', 'P569:When was X born?', 'P570:When did X die?', 'P737:Who influenced X?', 'P172:What is the ethnic group of X?']



{'P19': 'Human Entities',
 'P20': 'Human Entities',
 'P22': 'Human Entities',
 'P25': 'Human Entities',
 'P26': 'Human Entities',
 'P27': 'Human Entities',
 'P39': 'Human Entities',
 'P40': 'Human Entities',
 'P54': 'Human Entities',
 'P69': 'Human Entities',
 'P102': 'Human Entities',
 'P140': 'Human Entities',
 'P3373': 'Human Entities',
 'P569': 'Human Entities',
 'P570': 'Human Entities',
 'P737': 'Human Entities',
 'P172': 'Human Entities'}

In [73]:
df = df_raw.copy()
df = df[df["evidence_sent_ids"].str.len() == 1]  # 1 Evidence
df = df[df["evidence_sents"].str.len() == 1]  # 1 Evidence Sentence
df = df[df["head_entity_in_evidence"].str.len() >= 1]  # 1 Head in Evidence
df = df[df["head_entity_names"].str.len() == 1]  # All heads have the same name
print(len(df))

# Filter Repeated Labels (Only 1 h->t)
def not_repeated_label(label, labels):
    repeat_count = 0
    for l in labels:
        if l['h'] == label['h'] and l['t'] == label['t']:
            repeat_count += 1
    return True if repeat_count == 1 else False
df["repeated_label"] = df.apply(lambda r: not_repeated_label(r["label"], r["labels"]), axis=1)
df = df[df["repeated_label"]]
print(len(df))

# df = df.sample(250, random_state=0)
# print(len(df))

def flatten(xss):
    return [x for xs in xss for x in xs]

def generate_query_foil(r):
    head_name = r["head_entity_longest_name"]
    relation = r["relation"]
    try:
        relation_cluster = relation_clusters_relation_to_cluster[relation]
        possible_relations = relation_clusters[relation_cluster].copy()
        possible_relations.remove(relation)
    except:
        return [None, None]
    for label in r["labels"]:
        if label["h"] == r["label"]["h"] and label["t"] == r["label"]["t"] and label["r"] in possible_relations:
            possible_relations.remove(label["r"])
    np.random.seed(0)
    foil_relation = np.random.choice(possible_relations)
    return [generate_query_question(head_name, "", foil_relation), foil_relation]
    

df["query_orig"] = df["query"]
df["query_foil"] = df.apply(lambda r: generate_query_foil(r)[0], axis=1)
df["relations_orig"] = df["labels"].apply(lambda x: set([r["r"] for r in x]))
df["relation_foil"] = df.apply(lambda r: generate_query_foil(r)[1], axis=1)
df["sents_complete"] = df["sents"].apply(lambda x: " ".join(flatten(x)))

df = df.dropna(subset=["query_foil"])
print(len(df))

sents_cols = []
sents_cols.append(("query_orig", "sents_complete"))
sents_cols.append(("query_foil", "sents_complete"))

df[["query_orig", "query_foil", "relations_orig", "relation_foil", "sents_complete"]].head(5)

2262
1519
386


Unnamed: 0,query_orig,query_foil,relations_orig,relation_foil,sents_complete
59,Who is the spouse of Ann Todd?,When was Ann Todd born?,"{P57, P26, P272, P800, P577, P112, P161}",P569,"The Sound Barrier ( known in the United States , as Breaking Through the Sound Barrier and Breaking the Sound Barrier ) is a 1952 British film directed by David Lean . It is a fictional story about attempts by aircraft designers and test pilots to break the sound barrier . It was David Lean 's third and final film with his wife Ann Todd , but it was his first for Alexander Korda 's London Films following the break - up of Cineguild . The Sound Barrier stars Ralph Richardson , Ann Todd , and Nigel Patric..."
137,When was Dora Acuña born?,Who is the sibling of Dora Acuña?,"{P1412, P570, P27, P569}",P3373,"This is a list of women writers who were born in Paraguay or whose writings are closely associated with that country . Dora Acuña ( 1903 – 1987 ) , poet , journalist , radio presenter Gladys Carmagnola ( born 1939 ) , acclaimed poet , works for adults and children Raquel Chaves ( born 1939 ) , poet , journalist , educator Susy Delgado ( born 1949 ) , poet , writes in Spanish and Guarani Renée Ferrer de Arréllaga ( born 1944 ) , poet , novelist Josefina Pla ( 1903 – 1999 ) , Spanish - born Paraguayan poe..."
138,When did Dora Acuña die?,Who is the sibling of Dora Acuña?,"{P1412, P570, P27, P569}",P3373,"This is a list of women writers who were born in Paraguay or whose writings are closely associated with that country . Dora Acuña ( 1903 – 1987 ) , poet , journalist , radio presenter Gladys Carmagnola ( born 1939 ) , acclaimed poet , works for adults and children Raquel Chaves ( born 1939 ) , poet , journalist , educator Susy Delgado ( born 1949 ) , poet , writes in Spanish and Guarani Renée Ferrer de Arréllaga ( born 1944 ) , poet , novelist Josefina Pla ( 1903 – 1999 ) , Spanish - born Paraguayan poe..."
139,When was Gladys Carmagnola born?,Who is the sibling of Gladys Carmagnola?,"{P1412, P570, P27, P569}",P3373,"This is a list of women writers who were born in Paraguay or whose writings are closely associated with that country . Dora Acuña ( 1903 – 1987 ) , poet , journalist , radio presenter Gladys Carmagnola ( born 1939 ) , acclaimed poet , works for adults and children Raquel Chaves ( born 1939 ) , poet , journalist , educator Susy Delgado ( born 1949 ) , poet , writes in Spanish and Guarani Renée Ferrer de Arréllaga ( born 1944 ) , poet , novelist Josefina Pla ( 1903 – 1999 ) , Spanish - born Paraguayan poe..."
141,When was Raquel Chaves born?,Who is the sibling of Raquel Chaves?,"{P1412, P570, P27, P569}",P3373,"This is a list of women writers who were born in Paraguay or whose writings are closely associated with that country . Dora Acuña ( 1903 – 1987 ) , poet , journalist , radio presenter Gladys Carmagnola ( born 1939 ) , acclaimed poet , works for adults and children Raquel Chaves ( born 1939 ) , poet , journalist , educator Susy Delgado ( born 1949 ) , poet , writes in Spanish and Guarani Renée Ferrer de Arréllaga ( born 1944 ) , poet , novelist Josefina Pla ( 1903 – 1999 ) , Spanish - born Paraguayan poe..."


In [74]:
class YourCustomDEModel:
    def __init__(self, q_model, doc_model, pooling, sep: str = " ", verbose=True, **kwargs):
        self.tokenizer = AutoTokenizer.from_pretrained(q_model)
        self.query_encoder = AutoModel.from_pretrained(q_model)
        self.context_encoder = AutoModel.from_pretrained(doc_model)
        self.pooling = pooling
        self.sep = sep
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.verbose = verbose
    
    # Write your own encoding query function (Returns: Query embeddings as numpy array)
    def encode_queries(self, queries: List[str], batch_size=32, **kwargs) -> np.ndarray:
        return self.encode_in_batch(self.query_encoder, queries, batch_size)
    
    # Write your own encoding corpus function (Returns: Document embeddings as numpy array)  
    def encode_corpus(self, corpus: List[Dict[str, str]], batch_size=32, **kwargs) -> np.ndarray:
        if type(corpus) is dict:
            sentences = [(corpus["title"][i] + self.sep + corpus["text"][i]).strip() if "title" in corpus else corpus["text"][i].strip() for i in range(len(corpus['text']))]
        else:
            sentences = [(doc["title"] + self.sep + doc["text"]).strip() if "title" in doc else doc["text"].strip() for doc in corpus]
        return self.encode_in_batch(self.context_encoder, sentences, batch_size)

    def encode_in_batch(self, model, sentences: List[str], batch_size=32, **kwargs) -> np.ndarray:
        model.to(self.device)
        all_embeddings = []
        for batch in tqdm(torch.utils.data.DataLoader(sentences, batch_size=batch_size, shuffle=False), disable=not self.verbose):
            inputs = self.tokenizer(batch, padding=True, truncation=True, return_tensors='pt', max_length=512)
            inputs = {key: val.to(self.device) for key, val in inputs.items()}
            outputs = model(**inputs)
            ### POOLING
            if self.pooling == "avg":
                embeddings = self.mean_pooling(outputs[0], inputs['attention_mask'])
            elif self.pooling == "cls":
                embeddings = outputs.last_hidden_state[:, 0, :]  # [128, 768] = [batch, emb_dim]
            else:
                raise ValueError("Pooling method not supported")
            all_embeddings.extend(embeddings.detach().cpu().numpy())
        all_embeddings = np.array(all_embeddings)
        if self.verbose: print(all_embeddings.shape)
        return all_embeddings

    def mean_pooling(self, token_embeddings, mask):
        token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
        sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
        return sentence_embeddings

In [75]:
### RUN MODELS AND COMPUTE DOT SCORES
def digitize_col(df_col, bins) -> pd.DataFrame:
    return pd.cut(df_col, bins=bins)

cfgs = [
    ("facebook/dragon-plus-query-encoder", "facebook/dragon-plus-context-encoder", "cls"),
    ("facebook/dragon-roberta-query-encoder", "facebook/dragon-roberta-context-encoder", "cls"),
    ("facebook/contriever-msmarco", "facebook/contriever-msmarco", "avg"),
    ("facebook/contriever", "facebook/contriever", "avg"),
    ("OpenMatch/cocodr-base-msmarco", "OpenMatch/cocodr-base-msmarco", "cls"),
    ("Shitao/RetroMAE_MSMARCO_finetune", "Shitao/RetroMAE_MSMARCO_finetune", "cls"),
    
    # ("Shitao/RetroMAE", "Shitao/RetroMAE", "cls"),
    # ("Shitao/RetroMAE_MSMARCO", "Shitao/RetroMAE_MSMARCO", "cls"),
]

plot_col_dots = []
for query_model, context_model, POOLING in tqdm(cfgs):
    dpr = YourCustomDEModel(query_model, context_model, POOLING, verbose=False)
    def to_doc_format(sentences: list):
        return [{"text": s} for s in sentences]
    for query_col, sent_col in tqdm(sents_cols, desc=f"{query_model}"):
        query_embds = dpr.encode_queries(df[query_col].to_list())
        embds = dpr.encode_corpus(to_doc_format(df[sent_col].to_list()))
        embds_dot = torch.einsum("bd,bd->b", torch.tensor(query_embds), torch.tensor(embds)).cpu().numpy()
        new_col = f"{query_model}_{query_col}{sent_col}_dot"
        # print(new_col)
        df[new_col] = embds_dot
        plot_col_dots.append(new_col)

df_dot = df.copy()

  0%|          | 0/6 [00:00<?, ?it/s]

facebook/dragon-plus-query-encoder:   0%|          | 0/2 [00:00<?, ?it/s]



facebook/dragon-roberta-query-encoder:   0%|          | 0/2 [00:00<?, ?it/s]



facebook/contriever-msmarco:   0%|          | 0/2 [00:00<?, ?it/s]



facebook/contriever:   0%|          | 0/2 [00:00<?, ?it/s]

OpenMatch/cocodr-base-msmarco:   0%|          | 0/2 [00:00<?, ?it/s]

Shitao/RetroMAE_MSMARCO_finetune:   0%|          | 0/2 [00:00<?, ?it/s]

In [76]:
df_dot.head(1)

Unnamed: 0,query_id,query,gold_docs,gold_docs_text,scores_stats,scores_gold,scores_1000,predicted_docs_text_10,id,title,vertexSet,labels,sents,split,label,label_idx,head_entity,tail_entity,head_entity_names,tail_entity_names,head_entity_longest_name,tail_entity_longest_name,head_entity_types,tail_entity_types,evidence_sent_ids,evidence_sents,head_entity_in_evidence,tail_entity_in_evidence,relation,relation_name,query_question,duplicate_titles_len,duplicate_titles,hit_rank,gold_doc,gold_doc_title,gold_doc_text,gold_doc_score,pred_doc,pred_doc_title,pred_doc_text,pred_doc_score,gold_doc_len,pred_doc_len,query_decompx_tokens,query_decompx_tokenizer_word_ids,query_decompx_cls_or_mean_pooled,query_decompx_tokens_dot_scores,query_decompx_decompx_last_layer_pooled,gold_doc_decompx_tokens,gold_doc_decompx_tokenizer_word_ids,gold_doc_decompx_cls_or_mean_pooled,gold_doc_decompx_tokens_dot_scores,gold_doc_decompx_decompx_last_layer_pooled,pred_doc_decompx_tokens,pred_doc_decompx_tokenizer_word_ids,pred_doc_decompx_cls_or_mean_pooled,pred_doc_decompx_tokens_dot_scores,pred_doc_decompx_decompx_last_layer_pooled,repeated_label,query_orig,query_foil,relations_orig,relation_foil,sents_complete,facebook/dragon-plus-query-encoder_query_origsents_complete_dot,facebook/dragon-plus-query-encoder_query_foilsents_complete_dot,facebook/dragon-roberta-query-encoder_query_origsents_complete_dot,facebook/dragon-roberta-query-encoder_query_foilsents_complete_dot,facebook/contriever-msmarco_query_origsents_complete_dot,facebook/contriever-msmarco_query_foilsents_complete_dot,facebook/contriever_query_origsents_complete_dot,facebook/contriever_query_foilsents_complete_dot,OpenMatch/cocodr-base-msmarco_query_origsents_complete_dot,OpenMatch/cocodr-base-msmarco_query_foilsents_complete_dot,Shitao/RetroMAE_MSMARCO_finetune_query_origsents_complete_dot,Shitao/RetroMAE_MSMARCO_finetune_query_foilsents_complete_dot
59,test285,Who is the spouse of Ann Todd?,[The Sound Barrier],"{'The Sound Barrier': {'text': 'The Sound Barrier ( known in the United States , as Breaking Through the Sound Barrier and Breaking the Sound Barrier ) is a 1952 British film directed by David Lean . It is a fictional story about attempts by aircraft designers and test pilots to break the sound barrier . It was David Lean 's third and final film with his wife Ann Todd , but it was his first for Alexander Korda 's London Films following the break - up of Cineguild . The Sound Barrier stars Ralph Richards...","{'len': 1000, 'max': 380.66021728515625, 'min': 365.82958984375, 'std': 1.2860670766013418, 'mean': 367.10054888916017, 'median': 366.7718505859375}",{'The Sound Barrier': 369.1648864746094},"{'Mary Todd Lincoln': 380.66021728515625, 'Levi Todd': 375.3865661621094, 'Todd Manning and Blair Cramer': 374.31134033203125, 'The Nerds': 373.2972106933594, 'Anita Thigpen Perry': 372.78009033203125, 'Joy Todd': 372.6305236816406, 'Alfred Todd (politician)': 372.354736328125, 'Hanne Tott': 372.315185546875, 'Anne Hearst': 372.2498779296875, 'Ann Thwaite': 371.7024230957031, 'Debbie Dingell': 371.66107177734375, 'Kristin Nelson': 371.5616149902344, 'Dolley Madison': 371.5113220214844, 'Anne, Princess R...","{'Mary Todd Lincoln': {'text': 'Mary Ann Todd Lincoln ( December 13 , 1818 - July 16 , 1882 ) was the wife of the 16th President of the United States , Abraham Lincoln , and as such the First Lady of the United States from 1861 to 1865 . She dropped the name Ann after her younger sister , Ann Todd ( Clark ) , was born , and did not use the name Todd after marrying . Mary was a member of a large , wealthy Kentucky family , and was well educated . After finishing school during her teens , she moved to Spr...",test285,The Sound Barrier,"[[{'type': 'MISC', 'pos': [11, 16], 'name': 'Breaking Through the Sound Barrier', 'sent_id': 0, 'global_pos': [11, 11], 'index': '0_0'}, {'type': 'MISC', 'pos': [0, 3], 'name': 'The Sound Barrier', 'sent_id': 4, 'global_pos': [99, 99], 'index': '0_1'}, {'name': 'Breaking the Sound Barrier', 'pos': [17, 21], 'sent_id': 0, 'type': 'MISC', 'global_pos': [17, 17], 'index': '0_2'}, {'type': 'MISC', 'pos': [0, 3], 'name': 'The Sound Barrier', 'sent_id': 0, 'global_pos': [0, 0], 'index': '0_3'}, {'type': 'MISC...","[{'r': 'P26', 'h': 4, 't': 5, 'evidence': [2]}, {'r': 'P26', 'h': 5, 't': 4, 'evidence': [2]}, {'r': 'P577', 'h': 0, 't': 2, 'evidence': [0]}, {'r': 'P57', 'h': 0, 't': 4, 'evidence': [0, 2, 4]}, {'r': 'P161', 'h': 0, 't': 5, 'evidence': [3]}, {'r': 'P161', 'h': 0, 't': 9, 'evidence': [3]}, {'r': 'P161', 'h': 0, 't': 10, 'evidence': [3]}, {'r': 'P57', 'h': 11, 't': 4, 'evidence': [5]}, {'r': 'P577', 'h': 11, 't': 12, 'evidence': [5]}, {'h': 7, 't': 6, 'r': 'P112', 'evidence': []}, {'h': 0, 't': 7, 'r': ...","[[The, Sound, Barrier, (, known, in, the, United, States, ,, as, Breaking, Through, the, Sound, Barrier, and, Breaking, the, Sound, Barrier, ), is, a, 1952, British, film, directed, by, David, Lean, .], [It, is, a, fictional, story, about, attempts, by, aircraft, designers, and, test, pilots, to, break, the, sound, barrier, .], [It, was, David, Lean, 's, third, and, final, film, with, his, wife, Ann, Todd, ,, but, it, was, his, first, for, Alexander, Korda, 's, London, Films, following, the, break, -, u...",test,"{'r': 'P26', 'h': 5, 't': 4, 'evidence': [2]}",1,"[{'name': 'Ann Todd', 'pos': [12, 14], 'sent_id': 2, 'type': 'PER', 'global_pos': [63, 63], 'index': '5_0'}, {'name': 'Ann Todd', 'pos': [7, 9], 'sent_id': 3, 'type': 'PER', 'global_pos': [92, 92], 'index': '5_1'}]","[{'name': 'David Lean', 'pos': [29, 31], 'sent_id': 0, 'type': 'PER', 'global_pos': [29, 29], 'index': '4_0'}, {'type': 'PER', 'pos': [2, 4], 'name': 'David Lean', 'sent_id': 2, 'global_pos': [53, 53], 'index': '4_1'}, {'name': 'Lean', 'pos': [24, 25], 'sent_id': 4, 'type': 'PER', 'global_pos': [123, 123], 'index': '4_2'}, {'name': 'Lean', 'pos': [15, 16], 'sent_id': 5, 'type': 'PER', 'global_pos': [142, 142], 'index': '4_3'}]",{Ann Todd},"{David Lean, Lean}",Ann Todd,David Lean,{PER},{PER},[2],"[[It, was, David, Lean, 's, third, and, final, film, with, his, wife, Ann, Todd, ,, but, it, was, his, first, for, Alexander, Korda, 's, London, Films, following, the, break, -, up, of, Cineguild, .]]","[{'name': 'Ann Todd', 'pos': [12, 14], 'sent_id': 2, 'type': 'PER', 'global_pos': [63, 63], 'index': '5_0'}]","[{'type': 'PER', 'pos': [2, 4], 'name': 'David Lean', 'sent_id': 2, 'global_pos': [53, 53], 'index': '4_1'}]",P26,spouse,Who is the spouse of Ann Todd?,0,{},63.0,"The Sound Barrier The Sound Barrier ( known in the United States , as Breaking Through the Sound Barrier and Breaking the Sound Barrier ) is a 1952 British film directed by David Lean . It is a fictional story about attempts by aircraft designers and test pilots to break the sound barrier . It was David Lean 's third and final film with his wife Ann Todd , but it was his first for Alexander Korda 's London Films following the break - up of Cineguild . The Sound Barrier stars Ralph Richardson , Ann Todd ...",The Sound Barrier,"The Sound Barrier ( known in the United States , as Breaking Through the Sound Barrier and Breaking the Sound Barrier ) is a 1952 British film directed by David Lean . It is a fictional story about attempts by aircraft designers and test pilots to break the sound barrier . It was David Lean 's third and final film with his wife Ann Todd , but it was his first for Alexander Korda 's London Films following the break - up of Cineguild . The Sound Barrier stars Ralph Richardson , Ann Todd , and Nigel Patric...",369.164886,"Mary Todd Lincoln Mary Ann Todd Lincoln ( December 13 , 1818 - July 16 , 1882 ) was the wife of the 16th President of the United States , Abraham Lincoln , and as such the First Lady of the United States from 1861 to 1865 . She dropped the name Ann after her younger sister , Ann Todd ( Clark ) , was born , and did not use the name Todd after marrying . Mary was a member of a large , wealthy Kentucky family , and was well educated . After finishing school during her teens , she moved to Springfield , Ill...",Mary Todd Lincoln,"Mary Ann Todd Lincoln ( December 13 , 1818 - July 16 , 1882 ) was the wife of the 16th President of the United States , Abraham Lincoln , and as such the First Lady of the United States from 1861 to 1865 . She dropped the name Ann after her younger sister , Ann Todd ( Clark ) , was born , and did not use the name Todd after marrying . Mary was a member of a large , wealthy Kentucky family , and was well educated . After finishing school during her teens , she moved to Springfield , Illinois , where she ...",380.660217,161,241,"[[CLS], who, is, the, spouse, of, ann, todd, ?, [SEP]]","[None, 0, 1, 2, 3, 4, 5, 6, 6, None]","[-0.7147644, 0.32439718, 0.27682266, -0.39572704, -0.24833876, 0.6614727, -0.28332344, 0.13498431, -0.11509671, 0.115517445, 0.1608924, 0.48882008, 0.2081737, 0.3809503, -0.108406834, 0.36989212, 0.34214664, 0.15104607, -0.4066022, -0.1265414, 0.25561506, -0.40294933, -0.07236835, 0.261314, 0.5062571, -0.13665405, -0.037357192, -0.22267435, 0.058903802, 0.08302943, -0.09201495, 0.07204732, 0.28683144, -0.0054022246, 0.10854377, -0.122966826, -0.5642583, 0.2150462, -0.08464487, -0.34552395, -0.77458245, ...","[1.8846651, 1.0534244, -1.8747488, -1.0567846, 41.499516, -2.2349393, 30.285675, 35.64513, -0.046091586, -1.7083305]","[[0.044094715, -0.0036681755, -0.100443624, -0.06235094, 0.08152962, -0.14829059, 0.087456666, -0.07547034, -0.02290219, -0.06959176, -0.104356006, -0.0051139956, -0.019521283, 0.10759473, 0.0033398983, -0.07245315, 0.09716659, -0.13249972, 0.14716974, -0.014947569, -0.06442768, 0.053538945, 0.09087158, -0.026809232, -0.084038615, 0.10634507, -0.044397607, 0.024588771, -0.13069418, 0.111428134, 0.05648788, 0.14020804, 0.031704374, 0.016943466, -0.06981457, 0.031797133, -0.007895657, 0.053965867, -0.0218...","[[CLS], the, sound, barrier, the, sound, barrier, (, known, in, the, united, states, ,, as, breaking, through, the, sound, barrier, and, breaking, the, sound, barrier, ), is, a, 1952, british, film, directed, by, david, lean, ., it, is, a, fictional, story, about, attempts, by, aircraft, designers, and, test, pilots, to, break, the, sound, barrier, ., it, was, david, lean, ', s, third, and, final, film, with, his, wife, ann, todd, ,, but, it, was, his, first, for, alexander, ko, ##rda, ', s, london, fil...","[None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 77, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 86, 86, 87, 88, 89, 90, 91, 92, ...]","[-0.4058786, 0.43196732, 2.224159, -0.72314984, 2.85136, 0.5488908, 2.4664748, 0.30164462, -0.9024467, -3.3246112, -1.3384386, 1.3894011, -2.9361, 2.8507893, -0.17996317, 3.5336087, 3.624661, 0.7020855, 0.88560635, -1.7811216, -0.2390055, -1.7214316, 2.6796408, 0.92363715, 1.6550897, 0.59878, 2.230065, -0.45460713, -3.7071376, 2.1610408, 2.2020156, 0.5176013, 2.7118642, -2.2056417, -2.1799808, 1.7247983, -3.093075, -1.2397361, -1.0493253, -2.4660306, -1.2312139, -2.3241649, -2.183359, -0.06231726, -2.68...","[1120.1495, 31.526802, 39.001854, 5.945431, 25.11985, 33.090694, 14.389123, 54.48872, 103.39136, 45.361294, 20.500359, 37.554153, 30.102287, 32.91558, 25.807213, 6.479479, 37.005894, 8.8769865, 5.40766, 12.102623, 24.476166, 7.387661, 5.615821, 3.6331038, 16.254793, 53.17531, 88.23146, 43.510605, 62.976486, 36.00425, 14.410126, 2.8967457, 6.471522, 5.7936172, 50.04664, 37.672638, 23.30697, 33.88945, 27.36066, 47.65985, 5.3083935, 20.610163, 10.587261, 8.851594, 7.4707603, 29.955612, 33.31729, 11.6459675...","[[-0.07712949, 0.04119066, 0.5740616, -0.23945667, 0.7625394, 0.058966346, 0.74162614, 0.17498043, -0.330835, -0.87566483, -0.2806018, 0.3324056, -0.8440861, 0.76533794, -0.10100064, 0.88795125, 0.8882414, 0.09348645, 0.27557504, -0.3691618, -0.034016978, -0.39419907, 0.7757827, 0.41203147, 0.39542487, 0.3114369, 0.6462473, -0.16210935, -0.8540773, 0.67692095, 0.3993083, 0.09692501, 0.7440892, -0.44931737, -0.56951386, 0.37498856, -0.7596176, -0.3434712, -0.20182839, -0.71162796, -0.38206324, -0.7360154...","[[CLS], mary, todd, lincoln, mary, ann, todd, lincoln, (, december, 13, ,, 1818, -, july, 16, ,, 1882, ), was, the, wife, of, the, 16th, president, of, the, united, states, ,, abraham, lincoln, ,, and, as, such, the, first, lady, of, the, united, states, from, 1861, to, 1865, ., she, dropped, the, name, ann, after, her, younger, sister, ,, ann, todd, (, clark, ), ,, was, born, ,, and, did, not, use, the, name, todd, after, marrying, ., mary, was, a, member, of, a, large, ,, wealthy, kentucky, family, ,,...","[None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, ...]","[-1.0916798, 0.39398617, 2.008545, -0.98176825, 2.3703234, 1.5423791, 3.020127, 0.5031506, -1.4202002, -3.1331706, -0.7475092, 0.9300419, -3.2455425, 2.8623214, -0.1331398, 3.440537, 3.6707485, 1.0735432, 0.6769515, -1.9819801, 0.07145573, -1.8543757, 2.653778, 1.1146747, 1.1576549, 0.7821789, 2.6522737, -0.784687, -3.15896, 2.272624, 1.8623266, 0.24342711, 2.7140768, -2.1095245, -2.1577313, 1.0974652, -3.1035237, -1.6031094, -0.67464405, -2.1988223, -1.7791698, -2.7738137, -2.3679419, -0.035613053, -2....","[798.66833, 3.061686, 51.08542, 24.377144, 19.496555, 28.85645, 53.381287, 3.4527562, 38.507835, 20.442865, 26.17567, 11.011853, 46.629433, 10.398182, 35.466396, 24.711912, 8.053465, 66.907196, 21.527662, 56.34488, 1.9934406, 76.345116, 25.280922, 6.8011045, 49.450645, 10.284779, 22.45794, 9.461409, 13.10537, 11.158516, 15.29443, 0.14028683, 3.4427114, 12.955291, 15.332564, 10.259224, 4.633337, 10.449615, 21.708994, 13.862354, 20.483875, 10.116526, 10.828507, 8.118627, 4.893776, 36.12348, 4.499053, 18.8...","[[-0.058725342, 0.055819966, 0.41881943, -0.23026933, 0.5326824, -0.0003420785, 0.5792596, 0.121086895, -0.25727177, -0.58869225, -0.17055312, 0.23294936, -0.61261797, 0.57049316, -0.114162005, 0.64926744, 0.638925, 0.114589706, 0.21657261, -0.2641573, 0.015384605, -0.30829498, 0.5663818, 0.25803387, 0.289661, 0.23462483, 0.4558001, -0.15692094, -0.5707933, 0.5552913, 0.3049983, 0.045167893, 0.5575188, -0.2813111, -0.40175024, 0.30562252, -0.528683, -0.21958035, -0.118786186, -0.52523685, -0.33228713, -...",True,Who is the spouse of Ann Todd?,When was Ann Todd born?,"{P57, P26, P272, P800, P577, P112, P161}",P569,"The Sound Barrier ( known in the United States , as Breaking Through the Sound Barrier and Breaking the Sound Barrier ) is a 1952 British film directed by David Lean . It is a fictional story about attempts by aircraft designers and test pilots to break the sound barrier . It was David Lean 's third and final film with his wife Ann Todd , but it was his first for Alexander Korda 's London Films following the break - up of Cineguild . The Sound Barrier stars Ralph Richardson , Ann Todd , and Nigel Patric...",369.504181,361.687897,353.011749,341.206177,0.927676,0.846875,0.670109,0.674712,205.596939,205.091827,42.058804,42.559261


In [None]:
df = df_dot.copy()
print(plot_col_dots)

def standard_ttest_ppf(n, confidence_level=0.95):
    return stats.t.ppf(q=1-confidence_level, df=n-1, loc=0, scale=1)

results_df = pd.DataFrame()
for query_model, context_model, POOLING in tqdm(cfgs):
    ### T-TEST
    rdf = []
    for col1 in plot_col_dots:
        for col2 in plot_col_dots:
            if query_model == col1.split("_query")[0] and query_model == col2.split("_query")[0] and col1 != col2 and "query_short_namegold_doc_short" in col1:
                rdf.append({
                    "query_model": query_model,
                    "col1": col1,
                    "col2": col2,
                    "ttest_stats": stats.ttest_rel(df[col1], df[col2])[0],
                    "ttest_pvalue": stats.ttest_rel(df[col1], df[col2])[1],
                    "ttest_ci_low": np.abs(standard_ttest_ppf(len(df))),
                    "ttest_ci_high": np.abs(standard_ttest_ppf(len(df))),
                    "mean_diff": (df[col1] - df[col2]).mean(),
                    "std_diff": (df[col1] - df[col2]).std(),
                    "n": len(df),
                })
    rdf = pd.DataFrame(rdf)
    results_df = pd.concat([results_df, rdf]).sort_values("ttest_stats", ascending=True)

### PLOT
model_mappings = {
    "OpenMatch/cocodr-base-msmarco": ("COCO-DR", "Base MSMARCO"),
    "Shitao/RetroMAE_MSMARCO_finetune": ("RetroMAE", "MSMARCO FT"),
    "Shitao/RetroMAE_MSMARCO": ("RetroMAE", "MSMARCO"),
    "Shitao/RetroMAE": ("RetroMAE", ""),
    "facebook/contriever-msmarco": ("Contriever", "MSMARCO"),
    "facebook/contriever": ("Contriever", ""),
    "facebook/dragon-plus-query-encoder": ("Dragon+", ""),
    "facebook/dragon-roberta-query-encoder": ("Dragon RoBERTa", ""),
}
results_df["query_model"] = results_df["query_model"].apply(lambda x: model_mappings[x][0] + " " + model_mappings[x][1])
results_df.rename(columns={"ttest_stats": "Paired t-Test Statistic", "query_model": "Model"}, inplace=True)
plt.figure(figsize=(6, 4))
plt.title("Literal Bias: Matching vs. Different Names")
ax = sns.barplot(data=results_df, y="Model", x="Paired t-Test Statistic", palette=sns.color_palette("RdYlGn_r"), hue="Model")
for container in ax.containers:
    ax.bar_label(container, fmt='%.2f', label_type='center', fontsize=10)
ax.errorbar(x=results_df["Paired t-Test Statistic"], y=results_df["Model"], xerr=results_df[["ttest_ci_low", "ttest_ci_high"]].T.to_numpy(), fmt="none", c="k", capsize=5, elinewidth=1, markeredgewidth=1, alpha=0.5)
results_df.to_json("results/relation_df.json", orient="records")
# ax.bar_label(ax.containers[0], labels=[f"{s:.2f}" for s, p in zip(results_df["Paired t-Test Statistic"], results_df["ttest_pvalue"])], fontsize=10, fmt='%.2f', label_type='center', color='white')
# ax.bar_label(ax.containers[1], labels=[f"{s:.2f}" for s, p in zip(results_df["Paired t-Test Statistic"], results_df["ttest_pvalue"])], fontsize=10, fmt='%.2f', label_type='center', color='white')

plt.tight_layout()
plt.savefig("figs/relation_ttest.pdf")
results_df