In [37]:
import os
import re

In [38]:
sorted([item for item in os.listdir() if os.path.isdir(item)])

['vanilla-mfq']

In [39]:
dataset_name = "vanilla-mfq"

In [40]:
trec_executable = "../trec_eval/trec_eval"     # path to the TREC executable

qrels_file_path = "../qrels/qrels.txt"
# qrels_file_path = "qrels/qrels_without_empty.txt"
# qrels_file_path = "qrels/qrels_without_empty_nodes_file.txt"

run_output_folder = f"{dataset_name}/output"      # folder that contains the output of the runs
trec_output_folder = f"{dataset_name}/trec"       # folder that will contain the TREC output 

## TREC
Generate TREC output files

In [41]:
output_run = os.listdir(run_output_folder)

In [42]:
trec_cmd = f"{trec_executable} -q -m all_trec -c {qrels_file_path}"

In [43]:
shell_commands = list()
trec_output_files = list()

for file in output_run:
    trec_output_file = file.replace("output", "trec")
    cmd = f"{trec_cmd} {run_output_folder}/{file} > {trec_output_folder}/{trec_output_file}"

    trec_output_files.append(trec_output_file)
    shell_commands.append(cmd)

In [44]:
for cmd in shell_commands:
    os.system(cmd)

## Results
Extrapolate data from trec output

In [45]:
files = os.listdir(trec_output_folder)
files.sort()

In [46]:
# measurements = ["ndcg_cut_5", "ndcg_cut_10", "map_cut_5", "map_cut_10", "ndcg", "map" ,"set_recall"]
measurements = ["ndcg_cut_5", "ndcg_cut_10", "map_cut_5", "map_cut_10"]

def get_metrics(file_path: str)->dict:
    d = dict()
    with open(file_path) as f:
        for line in f:
            for m in measurements:
                if re.match(f"{m}\s", line):
                    d[m] = line.split()[-1]
    return d


In [47]:
out = dict()
for file in files:
    file_path = f"{trec_output_folder}/{file}"
    key = file.replace('-trec.txt','')
    out[key] =  get_metrics(file_path)

In [48]:
import pandas as pd

df = pd.DataFrame(out).transpose()
df.index.name = 'Model'
df.columns.name = 'Metric'

markdown_table = df.to_markdown()
print(markdown_table)

| Model                            |   ndcg_cut_5 |   ndcg_cut_10 |   map_cut_5 |   map_cut_10 |
|:---------------------------------|-------------:|--------------:|------------:|-------------:|
| BM25-EXTRACTED-ONLY-ALL-QUERIES  |       0.1306 |        0.1431 |      0.0747 |       0.0906 |
| BM25-EXTRACTED-ONLY-SYN-QUERIES  |       0.0806 |        0.0892 |      0.0434 |       0.0538 |
| BM25-EXTRACTED-ONLY-TREC-QUERIES |       0.05   |        0.0539 |      0.0314 |       0.0368 |
| BM25-META+EXTRACTED-ALL-QUERIES  |       0.3757 |        0.414  |      0.2087 |       0.2793 |
| BM25-META+EXTRACTED-SYN-QUERIES  |       0.2286 |        0.2459 |      0.1275 |       0.168  |
| BM25-META+EXTRACTED-TREC-QUERIES |       0.1471 |        0.1681 |      0.0812 |       0.1112 |
| BM25-META-ONLY-ALL-QUERIES       |       0.5059 |        0.5222 |      0.2865 |       0.382  |
| BM25-META-ONLY-SYN-QUERIES       |       0.2997 |        0.2996 |      0.1679 |       0.2185 |
| BM25-META-ONLY-TREC-QUERIES 

Synthetic row analysis

In [49]:
def find_row(markdown_table: str, target_name: str) -> str:
    lines = markdown_table.strip().split("\n")

    for line in lines:
        if re.match(f"\| {re.escape(target_name)}", line):
            return line

In [52]:
synthetic_rows = [
    "CS-META+EXTRACTED-ALL-QUERIES",
    "CS-META-ONLY-ALL-QUERIES",
    "CS-EXTRACTED-ONLY-ALL-QUERIES",
    "BM25-META+EXTRACTED-ALL-QUERIES",
    "BM25-META-ONLY-ALL-QUERIES",
    "BM25-EXTRACTED-ONLY-ALL-QUERIES",
    "LMD-META+EXTRACTED-ALL-QUERIES",
    "LMD-META-ONLY-ALL-QUERIES",
    "LMD-EXTRACTED-ONLY-ALL-QUERIES",
]

for name in synthetic_rows:
    print(find_row(markdown_table, name))

| CS-META+EXTRACTED-ALL-QUERIES    |       0.4756 |        0.5042 |      0.2692 |       0.3669 |
| CS-META-ONLY-ALL-QUERIES         |       0.4599 |        0.474  |      0.2583 |       0.3436 |
| CS-EXTRACTED-ONLY-ALL-QUERIES    |       0.1304 |        0.1384 |      0.0677 |       0.0802 |
| BM25-META+EXTRACTED-ALL-QUERIES  |       0.3757 |        0.414  |      0.2087 |       0.2793 |
| BM25-META-ONLY-ALL-QUERIES       |       0.5059 |        0.5222 |      0.2865 |       0.382  |
| BM25-EXTRACTED-ONLY-ALL-QUERIES  |       0.1306 |        0.1431 |      0.0747 |       0.0906 |
| LMD-META+EXTRACTED-ALL-QUERIES   |       0.2715 |        0.2982 |      0.1549 |       0.1926 |
| LMD-META-ONLY-ALL-QUERIES        |       0.4538 |        0.4699 |      0.2679 |       0.3446 |
| LMD-EXTRACTED-ONLY-ALL-QUERIES   |       0.1497 |        0.1655 |      0.0801 |       0.0998 |
