In [53]:
import os
from urllib.parse import urlencode

import requests

url = "https://ui.adsabs.harvard.edu/v1/search/query"

headers = {
    "Accept": "application/json",
    "Content-type": "application/json",
    "Authorization": f"Bearer {os.getenv('ADS_API_TOKEN')}"
}

def params_for(q=""):
    return {
        "__clearBigQuery": "true",
        "fl": (
            "identifier,[citations],reference,abstract,author,book_author,"
            "orcid_pub,orcid_user,orcid_other,bibcode,citation_count,"
            "comment,doi,id,keyword,page,property,pub,pub_raw,pubdate,pubnote,"
            "read_count,title,volume,links_data,esources,data,"
            "citation_count_norm,email,doctype"
        ),
        "q": q,
        "hl": "true",
        "hl.fl": "title,abstract,body,ack,*",
        "hl.maxAnalyzedChars": "150000",
        "hl.requireFieldMatch": "true",
        "hl.usePhraseHighlighter": "true",
        "rows": "25",
        "sort": "date desc, bibcode desc",
        "start": "0"
    }

def fetch_first_page(q):
    encoded_query = urlencode(params_for(q))
    response = requests.get(f"{url}?{encoded_query}", headers=headers)
    if response.status_code != 200:
            raise Exception(response.text)
    return response.json()
    

def fetch_first_n(q, n=1000):
    responses = []
    n_fetched_total = 0
    start = 0
    rows = 200 if n > 200 else n
    while True:
        params = params_for(q)
        params["rows"] = str(rows)
        params["start"] = str(start)
        response = requests.get(f"{url}?{urlencode(params)}", headers=headers)
        if response.status_code != 200:
            raise Exception(response.text)
        rv = response.json()
        responses.append(rv)
        n_fetched = len(rv["response"]["docs"])
        n_fetched_total += n_fetched
        if n_fetched_total >= n or n_fetched < rows:
            print(n_fetched_total, n, n_fetched, rows)
            break
        else:
            start += rows
            print(f"q: {q} start: {start}...")
    return responses

In [49]:
queries = (
    'full:"coronal mass ejection"',
    'full:"solar wind"',
    'full:"ionospheric_conductivity"',
    'full:"space weather"',
    'full:"geomagnetically induced current"',
    'full:("solar wind" AND magnetosphere AND coupling)',
    'full:(magnetosphere AND ionosphere AND coupling)',
    'full:("interplanetary magnetic field" AND reconnection)',
    'full:"substorm"',
    'full:"particle acceleration"',
    #'similar(bibcode:2015AdSpR..55.2745S)',
    #'useful(topn(200,similar(1958ApJ...128..664P)))',
    #'useful(topn(200,similar(1961PhRvL...6...47D)))',
    #'trending(full:"space weather")'
)

In [None]:
responses = []

In [59]:
from tqdm import tqdm

for q in tqdm(queries[1:]):
    responses.extend(fetch_first_n(q, n=1000))

  0%|          | 0/9 [00:00<?, ?it/s]

q: full:"solar wind" start: 200...
q: full:"solar wind" start: 400...
q: full:"solar wind" start: 600...
q: full:"solar wind" start: 800...


 11%|█         | 1/9 [01:20<10:47, 80.89s/it]

1000 1000 200 200
q: full:"ionospheric_conductivity" start: 200...
q: full:"ionospheric_conductivity" start: 400...
q: full:"ionospheric_conductivity" start: 600...
q: full:"ionospheric_conductivity" start: 800...


 22%|██▏       | 2/9 [02:45<09:43, 83.30s/it]

1000 1000 200 200
q: full:"space weather" start: 200...
q: full:"space weather" start: 400...
q: full:"space weather" start: 600...
q: full:"space weather" start: 800...


 33%|███▎      | 3/9 [04:06<08:12, 82.16s/it]

1000 1000 200 200
q: full:"geomagnetically induced current" start: 200...
q: full:"geomagnetically induced current" start: 400...
q: full:"geomagnetically induced current" start: 600...
q: full:"geomagnetically induced current" start: 800...


 44%|████▍     | 4/9 [04:59<05:52, 70.54s/it]

1000 1000 200 200
q: full:("solar wind" AND magnetosphere AND coupling) start: 200...
q: full:("solar wind" AND magnetosphere AND coupling) start: 400...
q: full:("solar wind" AND magnetosphere AND coupling) start: 600...
q: full:("solar wind" AND magnetosphere AND coupling) start: 800...


 56%|█████▌    | 5/9 [06:35<05:18, 79.63s/it]

1000 1000 200 200
q: full:(magnetosphere AND ionosphere AND coupling) start: 200...
q: full:(magnetosphere AND ionosphere AND coupling) start: 400...
q: full:(magnetosphere AND ionosphere AND coupling) start: 600...
q: full:(magnetosphere AND ionosphere AND coupling) start: 800...


 67%|██████▋   | 6/9 [07:58<04:02, 80.83s/it]

1000 1000 200 200
q: full:("interplanetary magnetic field" AND reconnection) start: 200...
q: full:("interplanetary magnetic field" AND reconnection) start: 400...
q: full:("interplanetary magnetic field" AND reconnection) start: 600...
q: full:("interplanetary magnetic field" AND reconnection) start: 800...


 78%|███████▊  | 7/9 [10:04<03:11, 95.60s/it]

1000 1000 200 200
q: full:"substorm" start: 200...
q: full:"substorm" start: 400...
q: full:"substorm" start: 600...
q: full:"substorm" start: 800...


 89%|████████▉ | 8/9 [11:25<01:30, 90.95s/it]

1000 1000 200 200
q: full:"particle acceleration" start: 200...
q: full:"particle acceleration" start: 400...
q: full:"particle acceleration" start: 600...
q: full:"particle acceleration" start: 800...


100%|██████████| 9/9 [12:50<00:00, 85.65s/it]

1000 1000 200 200





In [60]:
len(responses)

50

In [63]:
import json

with open('query_responses.json','w') as f:
    json.dump(responses, f, indent=2)

In [80]:
query_analysis = {}

for r in responses:
    highlighting = r['highlighting']
    q = r['responseHeader']['params']['q']
    docs = r['response']['docs']
    docs_with_highlighting = []
    for d in docs:
        dwh = {k: v for k, v in d.items()}
        h_for_doc = highlighting.get(d['id'])
        if h_for_doc:
            dwh['highlighting'] = h_for_doc
        docs_with_highlighting.append(dwh)
    if q not in query_analysis:
        query_analysis[q] = {"returned": []}
    query_analysis[q]["returned"].extend(docs_with_highlighting)

ADS display template: https://github.com/adsabs/bumblebee/blob/752b9146a404de2cfefebf55cb0cc983907f7519/src/js/widgets/list_of_things/templates/item-template.html

The template depends on preprocessing of the API JSON response by custom JS code, e.g. to produce a `formattedDate` field and a `links` field which is derived from various raw-API-response fields.

In [82]:
query_topic_reviews = {
    'full:"coronal mass ejection"': ['bibcode:2017LRSP...14....5K', 'bibcode:2012LRSP....9....3W'],
    'full:"solar wind"': ['bibcode:2021LRSP...18....3V'],
    'full:"ionospheric_conductivity"': ['bibcode:1993JATP...55.1493B', 'bibcode:2008AnGeo..26.3913A', 'bibcode:2012GMS...197..143M', 'bibcode:1956NCim....4S1385C'],
    'full:"space weather"': ['bibcode:2006LRSP....3....2S', 'bibcode:2021LRSP...18....4T', 'bibcode:2007LRSP....4....1P', 'bibcode:2015AdSpR..55.2745S', 'bibcode:2022FrASS...8..253B'],
    'full:"geomagnetically induced current"': ['bibcode:2017SpWea..15..828P', 'bibcode:2017SpWea..15..258B'],
    'full:("solar wind" AND magnetosphere AND coupling)': ['bibcode:2021LRSP...18....3V', 'bibcode:2007LRSP....4....1P', 'bibcode:2022FrASS...908629D'],
    'full:(magnetosphere AND ionosphere AND coupling)': ['bibcode:2007LRSP....4....1P', 'bibcode:2008SSRv..139..235W'],
    'full:("interplanetary magnetic field" AND reconnection)': ['bibcode:2012SSRv..172..187G', 'bibcode:2011SSRv..160...95F', 'bibcode:2021LRSP...18....3V'],
    'full:"substorm"': ['bibcode:2015SSRv..190....1K'],
    'full:"particle acceleration"': ['bibcode:2012SSRv..173..433F', 'bibcode:2012SSRv..173..103M'],
}

In [85]:
for q, a in tqdm(query_analysis.items(), total=len(query_analysis)):
    print(q, len(a["returned"]))
    if "topic_review_info" not in a:
        a["topic_review_info"] = []
    for q_bibcode in query_topic_reviews[q]:
        print(f"fetching {q_bibcode} for {q}")
        a["topic_review_info"].append(fetch_first_page(q_bibcode))

  0%|          | 0/10 [00:00<?, ?it/s]

full:"coronal mass ejection" 1000
fetching bibcode:2017LRSP...14....5K for full:"coronal mass ejection"


 10%|█         | 1/10 [00:00<00:04,  1.93it/s]

fetching bibcode:2012LRSP....9....3W for full:"coronal mass ejection"
full:"solar wind" 1000
fetching bibcode:2021LRSP...18....3V for full:"solar wind"


 20%|██        | 2/10 [00:00<00:03,  2.37it/s]

full:"ionospheric_conductivity" 1000
fetching bibcode:1993JATP...55.1493B for full:"ionospheric_conductivity"
fetching bibcode:2008AnGeo..26.3913A for full:"ionospheric_conductivity"
fetching bibcode:2012GMS...197..143M for full:"ionospheric_conductivity"
fetching bibcode:1956NCim....4S1385C for full:"ionospheric_conductivity"


 30%|███       | 3/10 [00:01<00:04,  1.65it/s]

full:"space weather" 1000
fetching bibcode:2006LRSP....3....2S for full:"space weather"
fetching bibcode:2021LRSP...18....4T for full:"space weather"
fetching bibcode:2007LRSP....4....1P for full:"space weather"
fetching bibcode:2015AdSpR..55.2745S for full:"space weather"


 40%|████      | 4/10 [00:03<00:05,  1.08it/s]

fetching bibcode:2022FrASS...8..253B for full:"space weather"
full:"geomagnetically induced current" 1000
fetching bibcode:2017SpWea..15..828P for full:"geomagnetically induced current"
fetching bibcode:2017SpWea..15..258B for full:"geomagnetically induced current"


 50%|█████     | 5/10 [00:03<00:03,  1.26it/s]

full:("solar wind" AND magnetosphere AND coupling) 1000
fetching bibcode:2021LRSP...18....3V for full:("solar wind" AND magnetosphere AND coupling)
fetching bibcode:2007LRSP....4....1P for full:("solar wind" AND magnetosphere AND coupling)


 60%|██████    | 6/10 [00:04<00:03,  1.27it/s]

fetching bibcode:2022FrASS...908629D for full:("solar wind" AND magnetosphere AND coupling)
full:(magnetosphere AND ionosphere AND coupling) 1000
fetching bibcode:2007LRSP....4....1P for full:(magnetosphere AND ionosphere AND coupling)


 70%|███████   | 7/10 [00:04<00:01,  1.50it/s]

fetching bibcode:2008SSRv..139..235W for full:(magnetosphere AND ionosphere AND coupling)
full:("interplanetary magnetic field" AND reconnection) 1000
fetching bibcode:2012SSRv..172..187G for full:("interplanetary magnetic field" AND reconnection)
fetching bibcode:2011SSRv..160...95F for full:("interplanetary magnetic field" AND reconnection)
fetching bibcode:2021LRSP...18....3V for full:("interplanetary magnetic field" AND reconnection)


 90%|█████████ | 9/10 [00:05<00:00,  2.02it/s]

full:"substorm" 1000
fetching bibcode:2015SSRv..190....1K for full:"substorm"
full:"particle acceleration" 1000
fetching bibcode:2012SSRv..173..433F for full:"particle acceleration"


100%|██████████| 10/10 [00:05<00:00,  1.70it/s]

fetching bibcode:2012SSRv..173..103M for full:"particle acceleration"





In [104]:
for q, a in query_analysis.items():
    a["relevant_bibcodes"] = set()
    for response in a["topic_review_info"]:
        a["relevant_bibcodes"] |= set(response['response']['docs'][0]['reference'])
    a["relevant_bibcodes"] = list(a["relevant_bibcodes"])

In [105]:
for q, a in query_analysis.items():
    for doc in a['returned']:
        doc_bibcodes = set()
        doc_bibcodes.add(doc['bibcode'])
        doc_bibcodes |= set(doc['identifier']) # non-bibcodes don't affect intersection check below.
        doc["_relevant_as_topic_review_ref"] = bool(doc_bibcodes & set(a["relevant_bibcodes"]))

In [129]:
for q, a in query_analysis.items():
    relevant = [doc["_relevant_as_topic_review_ref"] for doc in a['returned']]
    print(f"query '{q}':")
    print(f"  first relevant result at position: {next((i for i, r in enumerate(relevant) if r), 'N/A')}")

query 'full:"coronal mass ejection"':
  first relevant result at position: N/A
query 'full:"solar wind"':
  first relevant result at position: N/A
query 'full:"ionospheric_conductivity"':
  first relevant result at position: N/A
query 'full:"space weather"':
  first relevant result at position: N/A
query 'full:"geomagnetically induced current"':
  first relevant result at position: 679
query 'full:("solar wind" AND magnetosphere AND coupling)':
  first relevant result at position: 839
query 'full:(magnetosphere AND ionosphere AND coupling)':
  first relevant result at position: N/A
query 'full:("interplanetary magnetic field" AND reconnection)':
  first relevant result at position: N/A
query 'full:"substorm"':
  first relevant result at position: N/A
query 'full:"particle acceleration"':
  first relevant result at position: N/A


In [130]:
import json

with open('query_analysis.json','w') as f:
    json.dump(query_analysis, f, indent=2)

In [131]:
import json

with open('query_analysis.json','r') as f:
    qa_loaded = json.load(f)
    
for q, a in qa_loaded.items():
    relevant = [doc["_relevant_as_topic_review_ref"] for doc in a['returned']]
    print(f"query '{q}':")
    print(f"  first relevant result at position: {next((i for i, r in enumerate(relevant) if r), 'N/A')}")

query 'full:"coronal mass ejection"':
  first relevant result at position: N/A
query 'full:"solar wind"':
  first relevant result at position: N/A
query 'full:"ionospheric_conductivity"':
  first relevant result at position: N/A
query 'full:"space weather"':
  first relevant result at position: N/A
query 'full:"geomagnetically induced current"':
  first relevant result at position: 679
query 'full:("solar wind" AND magnetosphere AND coupling)':
  first relevant result at position: 839
query 'full:(magnetosphere AND ionosphere AND coupling)':
  first relevant result at position: N/A
query 'full:("interplanetary magnetic field" AND reconnection)':
  first relevant result at position: N/A
query 'full:"substorm"':
  first relevant result at position: N/A
query 'full:"particle acceleration"':
  first relevant result at position: N/A
