In [4]:
import os
import re

In [5]:
sorted([item for item in os.listdir() if os.path.isdir(item)])

['vanilla-mfq']

In [6]:
dataset_name = "vanilla-mfq"

In [7]:
trec_executable = "../trec_eval/trec_eval"     # path to the TREC executable

qrels_file_path = "../qrels/qrels.txt"
# qrels_file_path = "qrels/qrels_without_empty.txt"
# qrels_file_path = "qrels/qrels_without_empty_nodes_file.txt"

run_output_folder = f"{dataset_name}/output"      # folder that contains the output of the runs
trec_output_folder = f"{dataset_name}/trec"       # folder that will contain the TREC output 

## TREC
Generate TREC output files

In [8]:
output_run = os.listdir(run_output_folder)

In [9]:
trec_cmd = f"{trec_executable} -q -m all_trec -c {qrels_file_path}"

In [10]:
shell_commands = list()
trec_output_files = list()

for file in output_run:
    trec_output_file = file.replace("output", "trec")
    cmd = f"{trec_cmd} {run_output_folder}/{file} > {trec_output_folder}/{trec_output_file}"

    trec_output_files.append(trec_output_file)
    shell_commands.append(cmd)

In [11]:
for cmd in shell_commands:
    os.system(cmd)

## Results
Extrapolate data from trec output

In [12]:
files = os.listdir(trec_output_folder)
files.sort()

In [13]:
# measurements = ["ndcg_cut_5", "ndcg_cut_10", "map_cut_5", "map_cut_10", "ndcg", "map" ,"set_recall"]
measurements = ["ndcg_cut_5", "ndcg_cut_10", "map_cut_5", "map_cut_10"]

def get_metrics(file_path: str)->dict:
    d = dict()
    with open(file_path) as f:
        for line in f:
            for m in measurements:
                if re.match(f"{m}\s", line):
                    d[m] = line.split()[-1]
    return d


In [14]:
out = dict()
for file in files:
    file_path = f"{trec_output_folder}/{file}"
    key = file.replace('-trec.txt','')
    out[key] =  get_metrics(file_path)

In [15]:
import pandas as pd

df = pd.DataFrame(out).transpose()
df.index.name = 'Model'
df.columns.name = 'Metric'

markdown_table = df.to_markdown()
print(markdown_table)

| Model                            |   ndcg_cut_5 |   ndcg_cut_10 |   map_cut_5 |   map_cut_10 |
|:---------------------------------|-------------:|--------------:|------------:|-------------:|
| BM25-EXTRACTED-ALL-QUERIES       |       0.1263 |        0.1383 |      0.0724 |       0.088  |
| BM25-EXTRACTED-SYN-QUERIES       |       0.0759 |        0.085  |      0.0407 |       0.0514 |
| BM25-EXTRACTED-TREC-QUERIES      |       0.0504 |        0.0533 |      0.0317 |       0.0366 |
| BM25-META+EXTRACTED-ALL-QUERIES  |       0.3704 |        0.4099 |      0.2051 |       0.2752 |
| BM25-META+EXTRACTED-SYN-QUERIES  |       0.2265 |        0.2435 |      0.1265 |       0.1661 |
| BM25-META+EXTRACTED-TREC-QUERIES |       0.1439 |        0.1664 |      0.0786 |       0.1091 |
| BM25-META-ALL-QUERIES            |       0.5059 |        0.5222 |      0.2865 |       0.382  |
| BM25-META-SYN-QUERIES            |       0.2997 |        0.2996 |      0.1679 |       0.2185 |
| BM25-META-TREC-QUERIES      

Synthetic row analysis

In [16]:
def find_row(markdown_table: str, target_name: str) -> str:
    lines = markdown_table.strip().split("\n")

    for line in lines:
        if re.match(f"\| {re.escape(target_name)}", line):
            return line

In [17]:
synthetic_rows = [
    "CS-META+EXTRACTED-ALL-QUERIES",
    "CS-META-ALL-QUERIES",
    "CS-EXTRACTED-ALL-QUERIES",
    "BM25-META+EXTRACTED-ALL-QUERIES",
    "BM25-META-ALL-QUERIES",
    "BM25-EXTRACTED-ALL-QUERIES",
    "LMD-META+EXTRACTED-ALL-QUERIES",
    "LMD-META-ALL-QUERIES",
    "LMD-EXTRACTED-ALL-QUERIES",
]

for name in synthetic_rows:
    print(find_row(markdown_table, name))

| CS-META+EXTRACTED-ALL-QUERIES    |       0.4751 |        0.5042 |      0.2691 |       0.3667 |
| CS-META-ALL-QUERIES              |       0.4599 |        0.474  |      0.2583 |       0.3436 |
| CS-EXTRACTED-ALL-QUERIES         |       0.128  |        0.1362 |      0.0667 |       0.0789 |
| BM25-META+EXTRACTED-ALL-QUERIES  |       0.3704 |        0.4099 |      0.2051 |       0.2752 |
| BM25-META-ALL-QUERIES            |       0.5059 |        0.5222 |      0.2865 |       0.382  |
| BM25-EXTRACTED-ALL-QUERIES       |       0.1263 |        0.1383 |      0.0724 |       0.088  |
| LMD-META+EXTRACTED-ALL-QUERIES   |       0.2775 |        0.2997 |      0.159  |       0.1952 |
| LMD-META-ALL-QUERIES             |       0.4538 |        0.4699 |      0.2679 |       0.3446 |
| LMD-EXTRACTED-ALL-QUERIES        |       0.1481 |        0.1624 |      0.0799 |       0.0992 |
