In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pprint import pprint

from terminusdb_client import WOQLQuery as WQ

from ads_query_eval.config import get_terminus_client, get_terminus_config
from ads_query_eval.app.bootstrap import QUERIES
from ads_query_eval.frame import s3
from ads_query_eval.config import get_s3_client

client = get_terminus_client()

s3_client = get_s3_client()

def list_of_bibcodes(items):
    return [i["bibcode"] for i in items]

def get_all_items_responses(keys):
    return [s3.get_json(client=s3_client, key="items_all__" + key) for key in keys]

Checking for 200 response from http://terminus:6363/api/info...


In [None]:
# """commands to launch backfill for each job"""

# from ads_query_eval.frame.dagster import query_literal_to_dagster_name

# pairs = [(q, "retrieval__" + query_literal_to_dagster_name(q)) for q in QUERIES]

# for q, n in pairs:
#     print("docker-compose exec dagster-daemon dagster job launch -j '"
#           + n + 
#           "' \\\n--config-json '{\"ops\": {\"retrieval_op\": {\"config\": {\"date\": \"2022-09-26\", \"query_literal\": \""
#           + q.replace(r'"', r'\"') + 
#           "\"}}}}'")
#     print()

In [None]:
from tqdm.notebook import tqdm

bindings = (WQ()
 .group_by("v:q", "v:s3_key", "v:keys")
 .triple("v:r", "type", "@schema:Retrieval")
 .triple("v:r", "query", "v:q")
 .triple("v:r", "done", True)
 .triple("v:r", "s3_key", "v:s3_key")
 .execute(client)
)["bindings"]

for b in tqdm(bindings):
    keys = [k["@value"] for k in b["keys"]]
    r1, r2 = get_all_items_responses(keys)
    if list_of_bibcodes(r1) == list_of_bibcodes(r2):
        print("same!")

All returns are different! Huh!

In [None]:
# def delete_retrieval(r_id: str):
#     items = client.query_document({
#         "retrieval": r_id,
#         "@type": "RetrievedItem"
#     }, as_list=True)
#     item_ids = [i["@id"] for i in items]
#     return client.delete_document(item_ids + [r_id], commit_msg=f"deleting {r_id}")

# Run evaluations

In [81]:
import requests
from urllib.parse import quote

from ads_query_eval.lib.io import fetch_first_page

def get_items(s3_key):
    url = (
        "https://polyneme.nyc3.cdn.digitaloceanspaces.com/ads-query-eval/"
        + "items_all__"
        + quote(s3_key)
    )
    return requests.get(url).json()

In [98]:
ep = next(client.query_document({
    "@type": "EvaluatingProcedure",
    "fqn": "ads_query_eval.frame.evaluators.topic_review_references"
}, count=1))

In [122]:
day = "2022-08-24"

day_bindings = (WQ()
 .triple("v:r", "type", "@schema:Retrieval")
 .triple("v:r", "query", "v:q")
 .triple("v:q", "query_literal", "v:qliteral")
 .triple("v:r", "s3_key", "v:s3_key")
 .re(f".*({day}).*", "v:s3_key",["v:s3_key", "v:sub"])
 .execute(client)
)["bindings"]

ranks = []

print(f"Evaluation for returns on {day}")
print(f"by EvaluatingProcedure ads_query_eval.frame.evaluators.topic_review_references")

for i, b in enumerate(day_bindings):
    #print(b)
    query_literal = b["qliteral"]["@value"]
    items = get_items(b["s3_key"]["@value"])
    
    qa = {
        "returned": items,
        "topic_review_info": [],
        "relevant_bibcodes": set(),
    }

    for q_bibcode in ep["config"][query_literal]:
        #print(f"fetching {q_bibcode} for {query_literal}")
        qa["topic_review_info"].append(fetch_first_page(q_bibcode))

    for response in qa["topic_review_info"]:
        qa["relevant_bibcodes"] |= set(response['response']['docs'][0]['reference'])

    for doc in qa['returned']:
        doc_bibcodes = {doc['bibcode']} | set(doc['identifier']) # non-bibcodes don't affect intersection check.
        doc["_relevant_as_topic_review_ref"] = bool(doc_bibcodes & qa["relevant_bibcodes"])

    relevant = [doc["_relevant_as_topic_review_ref"] for doc in qa['returned']]
    print(f"({i+1}/{len(day_bindings)}) query: '{query_literal}'")
    print(f"--  R@1000: {sum(relevant)/len(relevant):.2%}")
    rank = 1 + next((i for i, r in enumerate(relevant) if r), None)
    print(f"--  first relevant result at position: {rank if rank is not None else '>1000'}")
    print(f"--  P@25: {sum(relevant[:25])/25:.2%}")
    ranks.append(rank)
print()
print(f"Mean Reciprocal Rank (MRR): {sum([1/(r+1) for r in ranks])/len(ranks):.4}")

Evaluation for returns on 2022-08-24
by EvaluatingProcedure ads_query_eval.frame.evaluators.topic_review_references
(1/14) query: 'full:"coronal mass ejection"'
--  R@1000: 14.20%
--  first relevant result at position: 1
--  P@25: 32.00%
(2/14) query: 'full:"solar wind"'
--  R@1000: 2.10%
--  first relevant result at position: 11
--  P@25: 12.00%
(3/14) query: 'full:"ionospheric_conductivity"'
--  R@1000: 1.80%
--  first relevant result at position: 8
--  P@25: 12.00%
(4/14) query: 'full:"space weather"'
--  R@1000: 2.70%
--  first relevant result at position: 4
--  P@25: 16.00%
(5/14) query: 'full:"geomagnetically induced current"'
--  R@1000: 6.40%
--  first relevant result at position: 1
--  P@25: 48.00%
(6/14) query: 'full:("solar wind" AND magnetosphere AND coupling)'
--  R@1000: 3.00%
--  first relevant result at position: 1
--  P@25: 4.00%
(7/14) query: 'full:(magnetosphere AND ionosphere AND coupling)'
--  R@1000: 1.30%
--  first relevant result at position: 38
--  P@25: 0.00%


In [123]:
day = "2022-09-26"

day_bindings = (WQ()
 .triple("v:r", "type", "@schema:Retrieval")
 .triple("v:r", "query", "v:q")
 .triple("v:q", "query_literal", "v:qliteral")
 .triple("v:r", "s3_key", "v:s3_key")
 .re(f".*({day}).*", "v:s3_key",["v:s3_key", "v:sub"])
 .execute(client)
)["bindings"]

ranks = []

print(f"Evaluation for returns on {day}")
print(f"by EvaluatingProcedure ads_query_eval.frame.evaluators.topic_review_references")

for i, b in enumerate(day_bindings):
    #print(b)
    query_literal = b["qliteral"]["@value"]
    items = get_items(b["s3_key"]["@value"])
    
    qa = {
        "returned": items,
        "topic_review_info": [],
        "relevant_bibcodes": set(),
    }

    for q_bibcode in ep["config"][query_literal]:
        #print(f"fetching {q_bibcode} for {query_literal}")
        qa["topic_review_info"].append(fetch_first_page(q_bibcode))

    for response in qa["topic_review_info"]:
        qa["relevant_bibcodes"] |= set(response['response']['docs'][0]['reference'])

    for doc in qa['returned']:
        doc_bibcodes = {doc['bibcode']} | set(doc['identifier']) # non-bibcodes don't affect intersection check.
        doc["_relevant_as_topic_review_ref"] = bool(doc_bibcodes & qa["relevant_bibcodes"])

    relevant = [doc["_relevant_as_topic_review_ref"] for doc in qa['returned']]
    print(f"({i+1}/{len(day_bindings)}) query: '{query_literal}'")
    print(f"--  R@1000: {sum(relevant)/len(relevant):.2%}")
    rank = 1 + next((i for i, r in enumerate(relevant) if r), None)
    print(f"--  first relevant result at position: {rank if rank is not None else '>1000'}")
    print(f"--  P@25: {sum(relevant[:25])/25:.2%}")
    ranks.append(rank)
print()
print(f"Mean Reciprocal Rank (MRR): {sum([1/(r+1) for r in ranks])/len(ranks):.4}")

Evaluation for returns on 2022-09-26
by EvaluatingProcedure ads_query_eval.frame.evaluators.topic_review_references
(1/14) query: 'full:"coronal mass ejection"'
--  R@1000: 13.90%
--  first relevant result at position: 1
--  P@25: 28.00%
(2/14) query: 'full:"solar wind"'
--  R@1000: 2.00%
--  first relevant result at position: 27
--  P@25: 0.00%
(3/14) query: 'full:"ionospheric_conductivity"'
--  R@1000: 2.70%
--  first relevant result at position: 9
--  P@25: 12.00%
(4/14) query: 'full:"space weather"'
--  R@1000: 3.80%
--  first relevant result at position: 2
--  P@25: 12.00%
(5/14) query: 'full:"geomagnetically induced current"'
--  R@1000: 5.80%
--  first relevant result at position: 1
--  P@25: 36.00%
(6/14) query: 'full:(magnetosphere AND ionosphere AND coupling)'
--  R@1000: 1.40%
--  first relevant result at position: 57
--  P@25: 0.00%
(7/14) query: 'full:("interplanetary magnetic field" AND reconnection)'
--  R@1000: 1.20%
--  first relevant result at position: 12
--  P@25: 4