In [1]:
# TASK 1: Analysis of step-one performance retrieval.
# First thing, start easy: build two-indexes, evaluate them top-m retrievals
# Hit-once and Hit-all. Then see other metrics like precision & recall + Roni's analysis.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# cleaning sample_data directory
!rm -r sample_data

In [3]:
!ls drive/My\ Drive/touche-2022-prototyping

dataset-prep-and-retrieval-diagnostic-analysis.ipynb
indexes
initial-retrieval-metric-analysis.ipynb
merged_documents
missed_1000.csv
missed_1200.csv
missed_1375.csv
missed_1500.csv
missed_250.csv
missed_500.csv
missed_786.csv
missed_full_1000.csv
missed_full_1375.csv
missed_full_1500.csv
missed_full_250.csv
missed_full_500.csv
missed_full_786.csv
mon-duo-retrieval-prototyping-and-analysis.ipynb
query-expansion-retrieval-metric-analysis.ipynb
topics-task2-51-100.xml
topics-task-2.xml
touche2020-task2-relevance-withbaseline.qrels
touche_complete_topics.csv
touche_ground_truth.csv
touche_results_2021.csv
touche-task2-51-100-relevance.qrels
touche-task2-passages-version-002-expanded-with-doc-t5-query.jsonl
touche_topics_query_expansion.csv


In [4]:
import logging
import tarfile
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet
# from typing import List
import spacy
# en_core_web_sm
import string
from tqdm import tqdm
from spacy.lang.en.stop_words import STOP_WORDS
import random
import numpy as np
import pandas as pd
random.seed(10)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


In [5]:
!ls

drive


In [5]:
# one-time pre-loading of libraries.
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
sp = spacy.load("en_core_web_sm")
all_stopwords = sp.Defaults.stop_words

In [6]:
text = sp("Nick likes to play football, however he is not too fond of tennis.")
# text_tokens = word_tokenize(text)
# print(text
#       )
token_list = []
token_tag_list = []
for token in text:
    token_list.append(token.text)
    token_tag_list.append(token.tag_)
print(token_list)
print(token_tag_list)

['Nick', 'likes', 'to', 'play', 'football', ',', 'however', 'he', 'is', 'not', 'too', 'fond', 'of', 'tennis', '.']
['NNP', 'VBZ', 'TO', 'VB', 'NN', ',', 'RB', 'PRP', 'VBZ', 'RB', 'RB', 'JJ', 'IN', 'NN', '.']


In [7]:
def synonym_antonym_extractor(w_):
     # extracting relevant synonyms and antonym pairs.
     from nltk.corpus import wordnet
     synonyms = []
     antonyms = []

     for syn in wordnet.synsets(w_):
          for l in syn.lemmas():
               synonyms.append(str(l.name()).replace('_',' '))
               if l.antonyms():
                    antonyms.append(str(l.antonyms()[0].name()).replace('_',' '))
     return list(set(synonyms)), list(set(antonyms))
# synonym_antonym_extractor('better')

In [8]:
import gensim.downloader as api
# overview of all models in gensim: https://github.com/RaRe-Technologies/gensim-data
model_glove = api.load("glove-wiki-gigaword-300")



In [13]:
# Getting 3 best synonyms and 2 best antonym pairs for a give 'ADJ'
def best_words_extractor(w_, syn_list, ann_list):
    # prepare dictionaries for scores of syn_list & ann_list with w_
    syns = []
    anns = []
    for s_ in syn_list:
        try:
            if model_glove.get_vector(s_) is not None and s_ != w_:
                syns.append(s_)
        except:
            pass
    # print(syns)
    for a_ in ann_list:
        try:
            if model_glove.get_vector(a_) is not None and a_ != w_:
                anns.append(a_)
        except:
            pass
    # print(anns)
    syn_score_dict = {}
    ann_score_dict = {}
    for s_ in syns:
        syn_score_dict[s_] = model_glove.distance(w_, s_)
    for a_ in anns:
        ann_score_dict[a_] = model_glove.distance(w_, a_)

    # getting only the top-k most similar synonyms and most dissimilar antonyms
    # values from the dictionaries and creating score permutations for them.

    syn_score_dict = {k: v for k, v in sorted(syn_score_dict.items(), key=lambda item: item[1])}
    # print(syn_score_dict)
    syn_score_dict = {k: syn_score_dict[k] for k in list(syn_score_dict)[:20]}
    # print(syn_score_dict)
    ann_score_dict = {k: v for k, v in sorted(ann_score_dict.items(), key=lambda item: item[1])}
    # print(ann_score_dict)
    ann_score_dict = {k: ann_score_dict[k] for k in list(ann_score_dict)[:20]}
    # print(ann_score_dict)

    syn_data = []
    ann_data = []
    c_ = 0
    for i, j in syn_score_dict.items():
        c_ = c_ + 1
        syn_data.append(i)
        if c_ >=3:
            break

    if len(syn_data) < 3:
        syn_data = ['good','well', 'best']

    c_ = 0
    for i, j in ann_score_dict.items():
        c_ = c_ + 1
        ann_data.append(i)
        if c_ >=1:
            break

    if len(ann_data) < 2:
        if len(ann_data) == 0:
            ann_data = ['worse','badly']
        if len(ann_data) == 1 and ann_data[0] != 'worse':
            ann_data.append('worse')
        if len(ann_data) == 1 and ann_data[0] == 'worse':
            ann_data.append('different')
    return syn_data, ann_data 

In [14]:
# testing the outputs for above defined functions.
syns, anons = synonym_antonym_extractor('better')
print(syns)
print(anons)
print(best_words_extractor('better', syns, anons))

['sound', 'advantageously', 'better', 'proficient', 'dear', 'expert', 'dependable', 'best', 'wagerer', 'safe', 'effective', 'adept', 'practiced', 'ripe', 'bettor', 'upright', 'good', 'in force', 'unspoilt', 'comfortably', 'punter', 'respectable', 'skillful', 'skilful', 'ameliorate', 'improve', 'well', 'in effect', 'undecomposed', 'easily', 'right', 'estimable', 'salutary', 'considerably', 'substantially', 'honorable', 'amend', 'full', 'beneficial', 'honest', 'unspoiled', 'serious', 'just', 'secure', 'break', 'intimately', 'meliorate', 'near']
['disadvantageously', 'worsen', 'worse', 'evil', 'ill', 'badly', 'bad']
(['good', 'well', 'improve'], ['worse', 'different'])


In [15]:
# return two expanded query versions.
# nouns only, top-3 synonyms and antonyms queries.
def get_comparation_superlation_nouns(query):
    nouns_as_string = []
    nouns_only_string = []
    restricted_nouns_as_string = []
    doc = sp(query)
    annotations = ["CC", "CD", "JJ", "JJR", "JJS",
            "RB", "RBR", "RBS", "NN", "NNS", "NNP",
            "NNPS", "VB"]
    annotations_except_nouns = ["CC", "CD", "JJ", "JJR", "JJS",
            "RB", "RBR", "RBS", "VB"]
    annotations_nouns = ["NN", "NNS", "NNP", "NNPS", "VB"]

    adj_flg = 0
    adj_val = 'better' # default value, query objectives.

    # appending data into nouns as string
    for token in doc:
        if token.tag_ in annotations:
            nouns_as_string.append(token.text)
            if token.tag_ in annotations_except_nouns and adj_flg == 0:
                adj_val = token.text
                adj_flg = 1
            if token.tag_ not in annotations_except_nouns:
                restricted_nouns_as_string.append(token.text)
            if token.tag_ in annotations_nouns:
                nouns_only_string.append(token.text)

    # appending top-3 syns and anons to the query
    adj_val= adj_val.lower()
    syns, anons = synonym_antonym_extractor(adj_val)
    # print(syns, anons)
    if len(syns) == 0:
        syns, _ = synonym_antonym_extractor('different')
    if len(anons) == 0:
        _, anons = synonym_antonym_extractor('better')
    
    syns_fin, anons_fin = best_words_extractor(adj_val,syns, anons)
    
    # queries preprepartion
    base_query = " ".join(nouns_as_string)
    noun_query = " ".join(nouns_only_string)
    temp_query = " ".join(restricted_nouns_as_string)
    syn1_query = "".join(syns_fin[0]).strip() + " " + temp_query
    syn2_query = "".join(syns_fin[1]).strip() + " " + temp_query
    syn3_query = "".join(syns_fin[2]).strip() + " " + temp_query
    ant1_query = "".join(anons_fin[0]).strip() + " " + temp_query
    ant2_query = "".join(anons_fin[1]).strip() + " " + temp_query

    return base_query.strip(), noun_query.strip(), syn1_query.strip(), syn2_query.strip(), syn3_query.strip(), \
    ant1_query.strip(), ant2_query.strip()

In [16]:
topics = [
    "What is the difference between sex and love?",
    "Which is the highest mountain in the world?",
    "Which is better, a laptop or a desktop?",
]
for q in topics:
    print(get_comparation_superlation_nouns(q))
    print('\n')

('difference sex and love', 'difference sex love', 'different difference sex love', 'unlike difference sex love', 'dissimilar difference sex love', 'bad difference sex love', 'worse difference sex love')


('highest mountain world', 'mountain world', 'high mountain world', 'eminent mountain world', 'high-pitched mountain world', 'low mountain world', 'worse mountain world')


('better laptop or desktop', 'laptop desktop', 'good laptop desktop', 'well laptop desktop', 'improve laptop desktop', 'worse laptop desktop', 'different laptop desktop')




In [17]:
topics_large = [
    "What is the difference between sex and love?",
    "Which is better, a laptop or a desktop?",
    "Which is better, Canon or Nikon?",
    "What are the best dish detergents?",
    "What are the best cities to live in?",
    "What is the longest river in the U.S.?",
    "Which is healthiest: coffee, green tea or black tea and why?",
    "What are the advantages and disadvantages of PHP over Python and vice versa?",
    "Why is Linux better than Windows?",
    "How to sleep better?",
    "Should I buy an LCD TV or a plasma TV?",
    "Train or plane? Which is the better choice?",
    "What is the highest mountain on Earth?",
    "Should one prefer Chinese medicine or Western medicine?",
    "What are the best washing machine brands?",
    "Should I buy or rent?",
    "Do you prefer cats or dogs, and why?",
    "What is the better way to grill outdoors: gas or charcoal?",
    "Which is better, MAC or PC?",
    "What is better: to use a brush or a sponge?",
    "Which is better, Linux or Microsoft?",
    "Which is better, Pepsi or Coke?",
    "What is better, Google search or Yahoo search?",
    "Which one is better, Netflix or Blockbuster?",
    "Which browser is better, Internet Explorer or Firefox?",
    "Which is a better vehicle: BMW or Audi?",
    "Which one is better, an electric stove or a gas stove?",
    "What planes are best, Boeing or Airbus?",
    "Which is better, Disneyland or Disney World?",
    "Should I buy an Xbox or a PlayStation?",
    "Which has more caffeine, coffee or tea?",
    "Which is better, LED or LCD Reception Displays?",
    "What is better: ASP or PHP?",
    "What is better for the environment, a real or a fake Christmas tree?",
    "Do you prefer tampons or pads?",
    "What IDE is better for Java: NetBeans or Eclipse?",
    "Is OpenGL better than Direct3D in terms of portability to different platforms?",
    "What are the differences between MySQL and PostgreSQL in performance?",
    "Is Java code more readable than code written in Scala?",
    "Which operating system has better performance: Windows 7 or Windows 8?",
    "Which smartphone has a better battery life: Xperia or iPhone?",
    "Which four wheel truck is better: Ford or Toyota?",
    "Should I prefer a Leica camera over Nikon for portrait photographs?",
    "Which company has a larger capitalization: Apple or Microsoft?",
    "Which laptop has a better durability: HP or Dell?",
    "Which beverage has more calories per glass: beer or cider?",
    "Is admission rate in Stanford higher than that of MIT?",
    "Is pasta healthier than pizza?",
    "Which city is more expensive to live in: San Francisco or New York?",
    "Whose salary is higher: basketball or soccer players?",
]
for q in topics_large:
    print(get_comparation_superlation_nouns(q))
    print('\n')

('difference sex and love', 'difference sex love', 'different difference sex love', 'unlike difference sex love', 'dissimilar difference sex love', 'bad difference sex love', 'worse difference sex love')


('better laptop or desktop', 'laptop desktop', 'good laptop desktop', 'well laptop desktop', 'improve laptop desktop', 'worse laptop desktop', 'different laptop desktop')


('better Canon or Nikon', 'Canon Nikon', 'good Canon Nikon', 'well Canon Nikon', 'improve Canon Nikon', 'worse Canon Nikon', 'different Canon Nikon')


('best dish detergents', 'detergents', 'better detergents', 'good detergents', 'well detergents', 'worst detergents', 'worse detergents')


('best cities live', 'cities live', 'better cities', 'good cities', 'well cities', 'worst cities', 'worse cities')


('longest river U.S.', 'river U.S.', 'long river U.S.', 'tenacious river U.S.', 'retentive river U.S.', 'short river U.S.', 'worse river U.S.')


('healthiest coffee green tea or black tea and', 'coffee tea tea',

In [29]:
from xml.dom import minidom
# define function for loading all the topics from the topics files.
def parse_xml(path):
  answer_list = []
  xmldoc = minidom.parse(path)
  itemlist = xmldoc.getElementsByTagName('topics')
  topic_list = itemlist[0].getElementsByTagName('topic')
  for topic in topic_list:
    tuple_for_add = tuple((topic.getElementsByTagName('number')[0].firstChild.nodeValue, topic.getElementsByTagName('title')[0].firstChild.nodeValue))
    answer_list.append(tuple_for_add)
  parsed=pd.DataFrame(answer_list, columns=["number","title"])
  return parsed

# preparing the list of topics and corresponding dataframe.
topics_2020 = parse_xml("/content/drive/MyDrive/touche-2022-prototyping/topics-task-2.xml")
topics_2021 = parse_xml("/content/drive/MyDrive/touche-2022-prototyping/topics-task2-51-100.xml")
touche_topics = topics_2020.append(topics_2021, ignore_index=True)
touche_topics

Unnamed: 0,number,title
0,1,\nWhat is the difference between sex and love?\n
1,2,"\nWhich is better, a laptop or a desktop?\n"
2,3,"\nWhich is better, Canon or Nikon?\n"
3,4,\nWhat are the best dish detergents?\n
4,5,\nWhat are the best cities to live in?\n
...,...,...
95,96,"Which is healthier to wear, boxers or briefs?"
96,97,What is the difference between a blender vs a ...
97,98,"Which is better, rock or rap?"
98,99,Do you think imagination is better than knowle...


In [30]:
 touche_topics['title'] = touche_topics['title'].apply(lambda x: x.strip())
 touche_topics['query_base'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[0])
 touche_topics['query_noun'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[1])
 touche_topics['query_synonym1'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[2])
 touche_topics['query_synonym2'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[3])
 touche_topics['query_synonym3'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[4])
 touche_topics['query_antonym1'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[5])
 touche_topics['query_antonym2'] = touche_topics['title'].apply(lambda x: get_comparation_superlation_nouns(x)[6])

In [31]:
touche_topics

Unnamed: 0,number,title,query_base,query_noun,query_synonym1,query_synonym2,query_synonym3,query_antonym1,query_antonym2
0,1,What is the difference between sex and love?,difference sex and love,difference sex love,different difference sex love,unlike difference sex love,dissimilar difference sex love,bad difference sex love,worse difference sex love
1,2,"Which is better, a laptop or a desktop?",better laptop or desktop,laptop desktop,good laptop desktop,well laptop desktop,improve laptop desktop,worse laptop desktop,different laptop desktop
2,3,"Which is better, Canon or Nikon?",better Canon or Nikon,Canon Nikon,good Canon Nikon,well Canon Nikon,improve Canon Nikon,worse Canon Nikon,different Canon Nikon
3,4,What are the best dish detergents?,best dish detergents,detergents,better detergents,good detergents,well detergents,worst detergents,worse detergents
4,5,What are the best cities to live in?,best cities live,cities live,better cities,good cities,well cities,worst cities,worse cities
...,...,...,...,...,...,...,...,...,...
95,96,"Which is healthier to wear, boxers or briefs?",healthier wear boxers or briefs,wear boxers briefs,healthy boxers briefs,fitter boxers briefs,salubrious boxers briefs,unhealthy boxers briefs,worse boxers briefs
96,97,What is the difference between a blender vs a ...,difference blender food processor,difference blender food processor,good difference blender food processor,well difference blender food processor,improve difference blender food processor,worse difference blender food processor,different difference blender food processor
97,98,"Which is better, rock or rap?",better rock or rap,rock rap,good rock rap,well rock rap,improve rock rap,worse rock rap,different rock rap
98,99,Do you think imagination is better than knowle...,think imagination better knowledge,think imagination knowledge,believe imagination knowledge,guess imagination knowledge,imagine imagination knowledge,forget imagination knowledge,worse imagination knowledge


In [32]:
touche_topics.to_csv("/content/drive/MyDrive/touche-2022-prototyping/touche_topics_query_expansion.csv", index=False)

In [None]:
# Metric Evaluation for the query expansion approach.
# 1. Loading the data from the touche topics file.
# 2. Loading the corresponding queries into List, making predictions of k=275 (min) on the built index.
# 3. Merging Logic: For additional queries, remove the matching documents & keep only first non-existing docs.
# 4. Evaluation Metric: Average coverage calculation for the query expansion.

In [33]:
# import statements for retrieval evaluation.
import pandas as pd

In [34]:
touche_topics = pd.read_csv('/content/drive/MyDrive/touche-2022-prototyping/touche_topics_query_expansion.csv')
touche_topics.head()

Unnamed: 0,number,title,query_base,query_noun,query_synonym1,query_synonym2,query_synonym3,query_antonym1,query_antonym2
0,1,What is the difference between sex and love?,difference sex and love,difference sex love,different difference sex love,unlike difference sex love,dissimilar difference sex love,bad difference sex love,worse difference sex love
1,2,"Which is better, a laptop or a desktop?",better laptop or desktop,laptop desktop,good laptop desktop,well laptop desktop,improve laptop desktop,worse laptop desktop,different laptop desktop
2,3,"Which is better, Canon or Nikon?",better Canon or Nikon,Canon Nikon,good Canon Nikon,well Canon Nikon,improve Canon Nikon,worse Canon Nikon,different Canon Nikon
3,4,What are the best dish detergents?,best dish detergents,detergents,better detergents,good detergents,well detergents,worst detergents,worse detergents
4,5,What are the best cities to live in?,best cities live,cities live,better cities,good cities,well cities,worst cities,worse cities


In [35]:
queries = touche_topics['title']
base_queries = touche_topics['query_base']
noun_queries = touche_topics['query_noun']
syn1_queries = touche_topics['query_synonym1']
syn2_queries = touche_topics['query_synonym2']
syn3_queries = touche_topics['query_synonym3']
ant1_queries = touche_topics['query_antonym1']
ant2_queries = touche_topics['query_antonym2']

In [36]:
# installing linux related stuff for pyserini
!sudo apt-get install libomp-dev
# installing important packages for building the new index on merged documents.
!pip install pyserini
!pip install faiss

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  libomp5
Suggested packages:
  libomp-doc
The following NEW packages will be installed:
  libomp-dev libomp5
0 upgraded, 2 newly installed, 0 to remove and 39 not upgraded.
Need to get 239 kB of archives.
After this operation, 804 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libomp5 amd64 5.0.1-1 [234 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libomp-dev amd64 5.0.1-1 [5,088 B]
Fetched 239 kB in 1s (377 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 2.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty

Collecting faiss
  Downloading faiss-1.5.3-cp37-cp37m-manylinux1_x86_64.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 7.5 MB/s 
Installing collected packages: faiss
Successfully installed faiss-1.5.3


In [37]:
from pyserini.search.lucene import LuceneSearcher
searcher_opt = LuceneSearcher('/content/drive/MyDrive/touche-2022-prototyping/indexes/baseline_index')
searcher_opt.set_bm25(1.2, 0.68)

In [62]:
solution_dict = {}
for id_, q_ in zip(touche_topics['number'], queries):
    hits = searcher_opt.search(q_.strip(), k=1375)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict[id_] = d_list

In [39]:
solution_dict_noun = {}
for id_, q_ in zip(touche_topics['number'], noun_queries):
    hits = searcher_opt.search(q_.strip(), k=2725)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_noun[id_] = d_list

In [40]:
solution_dict_base = {}
for id_, q_ in zip(touche_topics['number'], base_queries):
    hits = searcher_opt.search(q_.strip(), k=2725)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_base[id_] = d_list

In [41]:
solution_dict_syn1 = {}
for id_, q_ in zip(touche_topics['number'], syn1_queries):
    hits = searcher_opt.search(q_.strip(), k=2725)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_syn1[id_] = d_list

In [42]:
solution_dict_syn2 = {}
for id_, q_ in zip(touche_topics['number'], syn2_queries):
    hits = searcher_opt.search(q_.strip(), k=2725)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_syn2[id_] = d_list

In [43]:
solution_dict_syn3 = {}
for id_, q_ in zip(touche_topics['number'], syn3_queries):
    hits = searcher_opt.search(q_.strip(), k=2725)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_syn3[id_] = d_list

In [44]:
solution_dict_ant1 = {}
for id_, q_ in zip(touche_topics['number'], ant1_queries):
    hits = searcher_opt.search(q_.strip(), k=2000)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_ant1[id_] = d_list

In [45]:
solution_dict_ant2 = {}
for id_, q_ in zip(touche_topics['number'], ant2_queries):
    hits = searcher_opt.search(q_.strip(), k=2000)
    d_list = []
    for h_ in hits:
        d_= h_.docid.split('___')[0]
        if d_ not in d_list:
            d_list.append(d_)
    solution_dict_ant2[id_] = d_list

In [67]:
solution_dict_fin = {}
for (k,v), (k1,v1), (k2,v2), (k3,v3), (k4,v4), (k5,v5), (k6,v6), (k7,v7) \
    in zip(solution_dict.items(), solution_dict_noun.items(), solution_dict_base.items(), solution_dict_syn1.items(), \
           solution_dict_syn2.items(), solution_dict_syn3.items(), solution_dict_ant1.items(), solution_dict_ant2.items()):    
    if k == k1 and k1 == k2 and k2 == k3 and k3 == k4 and k4 == k5 and k5 == k6 and k6 == k7:
        l_temp = []
        # finding commonality amongst documents from different queries.
        for v_ in v:
            c_b = 0
            if v_ in v1:
                c_b = c_b + 1
            if v_ in v2:
                c_b = c_b + 1
            if v_ in v3:
                c_b = c_b + 1
            if v_ in v4:
                c_b = c_b + 1
            if v_ in v5:
                c_b = c_b + 1
            if c_b >= 1:
                l_temp.append(v_)
        
        # appending document logic.
        c_ = 0
        for v1_ in v1: # appending base documents.
            if v1_ not in l_temp:
                l_temp.append(v1_)
            elif c_ >= 100:
                break
            c_ = c_ + 1

        c_ = 0
        for v2_ in v2: # appending noun documents.
            if v2_ not in l_temp:
                l_temp.append(v2_)
            elif c_ >= 100:
                break
            c_ = c_ + 1

        c_ = 0
        for v3_ in v3: # appending syn1 documents.
            if v3_ not in l_temp:
                l_temp.append(v3_)
            elif c_ >= 100:
                break
            c_ = c_ + 1

        c_ = 0
        for v4_ in v4: # appending syn2 documents.
            if v4_ not in l_temp:
                l_temp.append(v4_)
            elif c_ >= 100:
                break
            c_ = c_ + 1

        c_ = 0
        for v5_ in v5: # appending syn3 documents.
            if v5_ not in l_temp:
                l_temp.append(v5_)
            elif c_ >= 100:
                break
            c_ = c_ + 1

        c_ = 0
        for v6_ in v6: # appending ant documents.
            if v6_ not in l_temp:
                l_temp.append(v6_)
            elif c_ >= 75:
                break
            c_ = c_ + 1

        c_ = 0
        for v7_ in v7: # appending ant documents.
            if v7_ not in l_temp:
                l_temp.append(v7_)
            elif c_ >= 75:
                break
            c_ = c_ + 1

        solution_dict_fin[k] = l_temp

In [68]:
i = 0
for x, y in solution_dict_fin.items():
    print(x, len(y))
    if i >= 10:
        break
    i = i+1

1 577
2 627
3 429
4 630
5 903
6 658
7 496
8 941
9 578
10 859
11 523


In [55]:
# loading the baseline qrel files.
new_rel_2021 = pd.read_csv('/content/drive/MyDrive/touche-2022-prototyping/touche_ground_truth.csv')
new_rel_2021.head()

Unnamed: 0,qid,no,doc,rel
0,1,0,clueweb12-0001wb-05-12311,0
1,1,0,clueweb12-1811wb-62-08424,1
2,1,0,clueweb12-1811wb-62-08423,1
3,1,0,clueweb12-1217wb-47-14048,0
4,1,0,clueweb12-1811wb-62-08425,1


In [56]:
from collections import defaultdict
ground_truth_dict = defaultdict(list)
rel0_truth_dict = defaultdict(list)
rel1_truth_dict = defaultdict(list)
rel2_truth_dict = defaultdict(list)

for i_, d_, x_ in zip(new_rel_2021['qid'], new_rel_2021['doc'], new_rel_2021['rel']):
    i_ = int(i_)
    d_ = str(d_)    
    if int(x_) > 0:
        ground_truth_dict[i_].append(d_)
    if int(x_) == 0:
        rel0_truth_dict[i_].append(d_)
    if int(x_) == 1:
        rel1_truth_dict[i_].append(d_)
    if int(x_) == 2:
        rel2_truth_dict[i_].append(d_)

In [69]:
# the final dictionaries for basic metric evaluation and analysis.
# Average percentage common, Hit-once and Hit-all metric basic definition.
hit_one = 0
hit_all = 0
total = 100
per_comm_avg = 0

for id_i, doc_i in ground_truth_dict.items():
    doc_i = set(doc_i)
    for id_j , doc_j in solution_dict_fin.items():
        doc_j = set(doc_j)
        if id_i == id_j:
            if doc_j.intersection(doc_i):
                hit_one += 1
            if doc_j.issuperset(doc_i):
                hit_all += 1
            per_comm_avg += len(doc_j.intersection(doc_i))/len(doc_i)
            break

print(f'Hit one: {round(hit_one / total, 4)}')
print(f'Hit all: {round(hit_all / total, 4)}')
print(f'Average common ratio: {round(per_comm_avg / total, 4)}')

hit0_one = 0
hit0_all = 0
per0_comm_avg = 0

for id_i, doc_i in rel0_truth_dict.items():
    doc_i = set(doc_i)
    for id_j , doc_j in solution_dict_fin.items():
        doc_j = set(doc_j)
        if id_i == id_j:
            if doc_j.intersection(doc_i):
                hit0_one += 1
            if doc_j.issuperset(doc_i):
                hit0_all += 1
            per0_comm_avg += len(doc_j.intersection(doc_i))/len(doc_i)
            break

print(f'Zero Relevance, Hit one: {round(hit0_one / total, 4)}')
print(f'Zero Relevance, Hit all: {round(hit0_all / total, 4)}')
print(f'Zero Relevance, Average common ratio: {round(per0_comm_avg / total, 4)}')

hit1_one = 0
hit1_all = 0
per1_comm_avg = 0

for id_i, doc_i in rel1_truth_dict.items():
    doc_i = set(doc_i)
    for id_j , doc_j in solution_dict_fin.items():
        doc_j = set(doc_j)
        if id_i == id_j:
            if doc_j.intersection(doc_i):
                hit1_one += 1
            if doc_j.issuperset(doc_i):
                hit1_all += 1
            per1_comm_avg += len(doc_j.intersection(doc_i))/len(doc_i)
            break

print(f'One Relevance, Hit one: {round(hit1_one / total, 4)}')
print(f'One Relevance, Hit all: {round(hit1_all / total, 4)}')
print(f'One Relevance, Average common ratio: {round(per1_comm_avg / total, 4)}')

hit2_one = 0
hit2_all = 0
per2_comm_avg = 0

for id_i, doc_i in rel2_truth_dict.items():
    doc_i = set(doc_i)
    for id_j , doc_j in solution_dict_fin.items():
        doc_j = set(doc_j)
        if id_i == id_j:
            if doc_j.intersection(doc_i):
                hit2_one += 1
            if doc_j.issuperset(doc_i):
                hit2_all += 1
            per2_comm_avg += len(doc_j.intersection(doc_i))/len(doc_i)
            break

print(f'Two Relevance, Hit one: {round(hit2_one / total, 4)}')
print(f'Two Relevance, Hit all: {round(hit2_all / total, 4)}')
print(f'Two Relevance, Average common ratio: {round(pe`r2_comm_avg / total, 4)}')

Hit one: 1.0
Hit all: 0.3
Average common ratio: 0.8766
Zero Relevance, Hit one: 1.0
Zero Relevance, Hit all: 0.07
Zero Relevance, Average common ratio: 0.756
One Relevance, Hit one: 0.99
One Relevance, Hit all: 0.4
One Relevance, Average common ratio: 0.856
Two Relevance, Hit one: 0.9
Two Relevance, Hit all: 0.49
Two Relevance, Average common ratio: 0.8034
