In [5]:
import os
from urllib.parse import urlencode

import requests

url = "https://ui.adsabs.harvard.edu/v1/search/query"

headers = {
    "Accept": "application/json",
    "Content-type": "application/json",
    "Authorization": f"Bearer {os.getenv('ADS_API_TOKEN')}"
}

def params_for(q=""):
    return {
        "__clearBigQuery": "true",
        "fl": (
            "identifier,[citations],abstract,author,book_author,"
            "orcid_pub,orcid_user,orcid_other,bibcode,citation_count,"
            "comment,doi,id,keyword,page,property,pub,pub_raw,pubdate,pubnote,"
            "read_count,title,volume,links_data,esources,data,"
            "citation_count_norm,email,doctype"
        ),
        "q": q,
        "hl": "true",
        "hl.fl": "title,abstract,body,ack,*",
        "hl.maxAnalyzedChars": "150000",
        "hl.requireFieldMatch": "true",
        "hl.usePhraseHighlighter": "true",
        "rows": "25",
        "sort": "date desc, bibcode desc",
        "start": "0"
    }

def fetch(q):
    encoded_query = urlencode(params_for(q))
    return requests.get(f"{url}?{encoded_query}", headers=headers)

In [6]:
queries = (
    'full:"coronal mass ejection"',
    'full:"solar wind"',
    'full:"ionospheric_conductivity"',
    'full:"geomagnetically induced current"',
    'full:("solar wind" AND magnetosphere AND coupling)',
    'full:(magnetosphere AND ionosphere AND coupling)',
    'full:("interplanetary magnetic field" AND reconnection)',
    'full:"substorm"',
    'full:"particle acceleration"',
    'similar(bibcode:2015AdSpR..55.2745S)',
    'useful(topn(200,similar(1958ApJ...128..664P)))',
    'useful(topn(200,similar(1961PhRvL...6...47D)))',
    'trending(full:"space weather")'
)

In [7]:
from tqdm import tqdm

from time import sleep

responses = []

for q in tqdm(queries):
    responses.append(fetch(q))
    sleep(2)

100%|██████████| 13/13 [01:23<00:00,  6.39s/it]


In [9]:
assert all(rv.status_code == 200 for rv in responses)

In [11]:
api_responses = [rv.json() for rv in responses]

In [34]:
docs_for_query = {}

for r in api_responses:
    highlighting = r['highlighting']
    q = r['responseHeader']['params']['q']
    docs = r['response']['docs']
    docs_with_highlighting = []
    for d in docs:
        dwh = {k: v for k, v in d.items()}
        h_for_doc = highlighting.get(d['id'])
        if h_for_doc:
            dwh['highlighting'] = h_for_doc
        docs_with_highlighting.append(dwh)
    docs_for_query[q] = docs_with_highlighting

In [None]:
# formattedDate: YYYY/MM
response['response']['docs'][0]['pubdate']

ADS display template: https://github.com/adsabs/bumblebee/blob/752b9146a404de2cfefebf55cb0cc983907f7519/src/js/widgets/list_of_things/templates/item-template.html

The template depends on preprocessing of the API JSON response by custom JS code, e.g. to produce a `formattedDate` field and a `links` field which is derived from various raw-API-response fields.

In [35]:
import json

with open('query_responses.json','w') as f:
    json.dump(docs_for_query, f, indent=2)