In [None]:
# !pip install pkgman
# !pip install git+https://github.com/clarivate/wosstarter_python_client.git

In [None]:
import clarivate.wos_starter.client
from pkgman import include
import time
import clarivate.wos_starter.client
from clarivate.wos_starter.client.rest import ApiException
from pprint import pprint

include(["pandas", "requests", "numpy", "tqdm", "os", "rich"])
pd = pandas
req = requests
np = numpy
os = os
pp = rich.print
tqdm = tqdm.tqdm

In [None]:
config_file = "../config/starter.key"
if os.path.exists(config_file):
    with open(config_file, "r") as f:
        api_key = f.read().strip()
    pp("[green bold]API key successfully loaded from config file.[/green bold]")
else:
    api_key = input("Please enter your API key: ")
    with open(config_file, "w") as f:
        f.write(api_key)
        pp("[green bold]API key saved to config file.[/green bold]")

In [82]:
def req_wos(
    query: str = "((DT=ARTICLE) OR (DT=Meeting) OR (DT=Proceedings Paper) OR (DT=Book) ) AND (PY=2024)",
    api_key: str = api_key,
    page: int = 1,
):
    """
    Function to make a request to the Web of Science API.
    """  # Defining the host is optional and defaults to http://api.clarivate.com/apis/wos-starter/v1

    # See configuration.py for a list of all supported configuration parameters.
    configuration = clarivate.wos_starter.client.Configuration(
        host="https://api.clarivate.com/apis/wos-starter/v1"
    )

    # The client must configure the authentication and authorization parameters
    # in accordance with the API server security policy.
    # Examples for each auth method are provided below, use the example that
    # satisfies your auth use case.

    # Configure API key authorization: ClarivateApiKeyAuth
    configuration.api_key["ClarivateApiKeyAuth"] = api_key
    ""
    # Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
    # configuration.api_key_prefix['ClarivateApiKeyAuth'] = 'Bearer'

    # Enter a context with an instance of the API client
    with clarivate.wos_starter.client.ApiClient(configuration) as api_client:
        # Create an instance of the API class
        api_instance = clarivate.wos_starter.client.DocumentsApi(api_client)
        q = (
            query if query else "PY=2024"
        )  # str | Web of Science advanced [advanced search query builder](https://webofscience.help.clarivate.com/en-us/Content/advanced-search.html). The supported field tags are listed in description.
        db = "WOS"
        limit = 50  # int | set the limit of records on the page (1-50) (optional) (default to 10)
        page = page  # int | set the result page (optional) (default to 1)
        sort_field = "LD+D"  # str | Order by field(s). Field name and order by clause separated by '+', use A for ASC and D for DESC, ex: PY+D. Multiple values are separated by comma. Supported fields:  * **LD** - Load Date * **PY** - Publication Year * **RS** - Relevance * **TC** - Times Cited  (optional)
        modified_time_span = None  # str | Defines a date range in which the results were most recently modified. Beginning and end dates must be specified in the yyyy-mm-dd format separated by '+' or ' ', e.g. 2023-01-01+2023-12-31. This parameter is not compatible with the all databases search, i.e. db=WOK is not compatible with this parameter. (optional)
        tc_modified_time_span = None  # str | Defines a date range in which times cited counts were modified. Beginning and end dates must be specified in the yyyy-mm-dd format separated by '+' or ' ', e.g. 2023-01-01+2023-12-31. This parameter is not compatible with the all databases search, i.e. db=WOK is not compatible with this parameter. (optional)
        detail = None  # str | it will returns the full data by default, if detail=short it returns the limited data (optional)

        try:
            # Query Web of Science documents
            api_response = api_instance.documents_get(
                q,
                db=db,
                limit=limit,
                page=page,
                sort_field=sort_field,
                modified_time_span=modified_time_span,
                tc_modified_time_span=tc_modified_time_span,
                detail=detail,
            )
            pp("[bold green]The response of DocumentsApi->documents_get:\n[/]")
            return api_response
        except ApiException as e:
            pp(
                "[bold red][Warning] Exception when calling DocumentsApi->documents_get: %s\n[/]"
                % e
            )
            return None
        except Exception as e:
            pp(
                "[bold red][Warning] Exception when calling DocumentsApi->documents_get: %s\n[/]"
                % e
            )
            return None


def parse_wos(content, include_meta: bool = True) -> dict:
    """
    Function to parse the result of the Web of Science API request.
    """

    # =[AuthorName(display_name='Nemanov, A. D.', wos_standard='Nemanov, AD', researcher_id=None),
    def parse_wos_item(item):
        parsed_item = {}
        for prop in [
            "uid",
            "title",
            "types",
            "sourceTypes",
            "source",
            "names",
            "links",
            "citations",
            "identifiers",
            "keywords",
        ]:
            if hasattr(item, prop):
                parsed_item[prop] = getattr(item, prop)
        # [i for i in parse_wos(result)["records"][0]["names"].authors]
        if parsed_item["names"] is not None:
            if parsed_item["names"].authors is not None:
                parsed_item["authors"] = [
                    {
                        "display_name": i.display_name,
                        "wos_standard": i.wos_standard,
                        "researcher_id": i.researcher_id,
                    }
                    for i in parsed_item["names"].authors
                ]
            # del parsed_item["names"]
        if parsed_item["source"] is not None:
            parsed_item["source_title"] = parsed_item["source"].source_title
            parsed_item["year"] = parsed_item["source"].publish_year
            parsed_item["month"] = parsed_item["source"].publish_month
            parsed_item["volume"] = parsed_item["source"].volume
            parsed_item["issue"] = parsed_item["source"].issue
            parsed_item["page_range"] = parsed_item["source"].pages.range
            parsed_item["page_begin"] = parsed_item["source"].pages.begin
            parsed_item["page_end"] = parsed_item["source"].pages.end
            parsed_item["page_count"] = parsed_item["source"].pages.count
            del parsed_item["source"]
        if parsed_item["citations"] is not None:
            parsed_item["citation_count"] = parsed_item["citations"][0].count
            parsed_item["citations_db"] = parsed_item["citations"][0].db
            del parsed_item["citations"]
        if parsed_item["keywords"] is not None:
            parsed_item["keywords"] = parsed_item["keywords"].author_keywords
            del parsed_item["keywords"]
        if parsed_item["identifiers"] is not None:
            parsed_item["doi"] = parsed_item["identifiers"].doi
            parsed_item["issn"] = parsed_item["identifiers"].issn
            parsed_item["isbn"] = parsed_item["identifiers"].isbn
            parsed_item["eisbn"] = parsed_item["identifiers"].eisbn
            del parsed_item["identifiers"]
        if parsed_item["links"] is not None:
            parsed_item["link_record"] = parsed_item["links"].record
            parsed_item["link_citing"] = parsed_item["links"].citing_articles
            parsed_item["link_references"] = parsed_item["links"].references
            parsed_item["link_related"] = parsed_item["links"].related
            del parsed_item["links"]
        return parsed_item

    if not content:
        return None

    docs = {
        "records": [parse_wos_item(i) for i in content.hits],
        "meta": {
            "total": content.metadata.total,
            "limit": content.metadata.limit,
            "page": content.metadata.page,
        },
    }

    if include_meta:
        return docs
    else:
        return docs["records"]

In [83]:
result = req_wos()
# pp(result)
parse_wos(result)["records"][0]

{'uid': 'WOS:001429163300076',
 'title': 'Collaborative Content Caching in IIoT: A Multi-Agent Reinforcement Learning-Based Approach',
 'types': ['Meeting'],
 'names': DocumentNames(authors=[AuthorName(display_name='Kang, Bingtao', wos_standard='Kang, BT', researcher_id=None), AuthorName(display_name='Peng, Kai', wos_standard='Peng, K', researcher_id=None), AuthorName(display_name='Chen, Yining', wos_standard='Chen, YN', researcher_id=None)], inventors=None, book_corp=[OtherName(display_name='IEEE COMPUTER SOC')], book_editors=None, books=None, additional_authors=None, anonymous=None, assignees=None, corp=None, editors=None, investigators=None, sponsors=None, issuing_organizations=None),
 'authors': [{'display_name': 'Kang, Bingtao',
   'wos_standard': 'Kang, BT',
   'researcher_id': None},
  {'display_name': 'Peng, Kai',
   'wos_standard': 'Peng, K',
   'researcher_id': None},
  {'display_name': 'Chen, Yining',
   'wos_standard': 'Chen, YN',
   'researcher_id': None}],
 'source_title'

In [85]:
df = pd.DataFrame(parse_wos(result)["records"])
df.to_csv(
    "../data/wos.tsv",
    index=False,
    sep="\t",
    encoding="utf-8",
)
df

Unnamed: 0,uid,title,types,names,authors,source_title,year,month,volume,issue,...,citation_count,citations_db,doi,issn,isbn,eisbn,link_record,link_citing,link_references,link_related
0,WOS:001429163300076,Collaborative Content Caching in IIoT: A Multi...,[Meeting],"authors=[AuthorName(display_name='Kang, Bingta...","[{'display_name': 'Kang, Bingtao', 'wos_standa...",2024 IEEE INTERNATIONAL CONFERENCE ON SMART IN...,2024,,,,...,0,WOS,10.1109/SmartIoT62235.2024.00084,2770-2669,979-8-3503-6645-7,979-8-3503-6644-0,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
1,WOS:001429163300022,Real-Scene 3D Urban Elements Modeling for Digi...,[Meeting],"authors=[AuthorName(display_name='Li, Yuejin',...","[{'display_name': 'Li, Yuejin', 'wos_standard'...",2024 IEEE INTERNATIONAL CONFERENCE ON SMART IN...,2024,,,,...,0,WOS,10.1109/SmartIoT62235.2024.00031,2770-2669,979-8-3503-6645-7,979-8-3503-6644-0,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
2,WOS:001424958800103,Angle-Aware Full 3D Coverage Control with ADMM...,[Meeting],"authors=[AuthorName(display_name='Lu, Zhiyuan'...","[{'display_name': 'Lu, Zhiyuan', 'wos_standard...","2024 SICE FESTIVAL WITH ANNUAL CONFERENCE, SIC...",2024,,,,...,0,WOS,,,979-8-3315-4446-1,978-4-9077-6483-8,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
3,WOS:001428150100468,Low-Intensity Pulsed Ultrasound Improved the o...,[Meeting],"authors=[AuthorName(display_name='He, Min', wo...","[{'display_name': 'He, Min', 'wos_standard': '...","2024 IEEE ULTRASONICS, FERROELECTRICS, AND FRE...",2024,,,,...,0,WOS,10.1109/UFFC-JS60046.2024.10794007,1099-4734,979-8-3503-7191-8,979-8-3503-7190-1,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
4,WOS:001436568400001,On the Behaviors of Fuzzy Knowledge Graphs,[Meeting],"authors=[AuthorName(display_name='Ye, Yu', wos...","[{'display_name': 'Ye, Yu', 'wos_standard': 'Y...","COMPUTER APPLICATIONS, CCF NCCA 2024, PT I",2024,,2274,,...,0,WOS,10.1007/978-981-97-9671-7_1,1865-0929,978-981-97-9670-0,978-981-97-9671-7,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
5,WOS:001428150100169,Study on Attenuation Tomography Using Low-freq...,[Meeting],"authors=[AuthorName(display_name='Zhang, Tong'...","[{'display_name': 'Zhang, Tong', 'wos_standard...","2024 IEEE ULTRASONICS, FERROELECTRICS, AND FRE...",2024,,,,...,0,WOS,10.1109/UFFC-JS60046.2024.10793704,1099-4734,979-8-3503-7191-8,979-8-3503-7190-1,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
6,WOS:001428150100022,Wideband Air-coupled Piezoelectric MEMS Ultras...,[Meeting],"authors=[AuthorName(display_name='Nabavi, Seye...","[{'display_name': 'Nabavi, Seyedfakhreddin', '...","2024 IEEE ULTRASONICS, FERROELECTRICS, AND FRE...",2024,,,,...,0,WOS,10.1109/UFFC-JS60046.2024.10793501,1099-4734,979-8-3503-7191-8,979-8-3503-7190-1,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
7,WOS:001432930500174,A Study of Structured Pruning for Hybrid Neura...,[Meeting],"authors=[AuthorName(display_name='Ghimire, Dee...","[{'display_name': 'Ghimire, Deepak', 'wos_stan...","2024 24TH INTERNATIONAL CONFERENCE ON CONTROL,...",2024,,,,...,0,WOS,,2093-7121,979-8-3315-1793-9,978-89-93215-38-0,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
8,WOS:001423279900102,A DRL Planning Framework for Resilience of FEM...,[Meeting],"authors=[AuthorName(display_name='Zhang, Zheng...","[{'display_name': 'Zhang, Zhenghan', 'wos_stan...","2024 56TH NORTH AMERICAN POWER SYMPOSIUM, NAPS...",2024,,,,...,0,WOS,10.1109/NAPS61145.2024.10741793,2163-4939,979-8-3315-2104-2,979-8-3315-2103-5,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
9,WOS:001429153800070,Bangla SBERT - Sentence Embedding Using Multil...,[Meeting],"authors=[AuthorName(display_name='Uddin, Md. S...","[{'display_name': 'Uddin, Md. Shihab', 'wos_st...","2024 IEEE 15TH ANNUAL UBIQUITOUS COMPUTING, EL...",2024,,,,...,0,WOS,10.1109/UEMCON62879.2024.10754765,,979-8-3315-4091-3,979-8-3315-4090-6,https://www.webofscience.com/api/gateway?GWVer...,,https://www.webofscience.com/api/gateway?GWVer...,https://www.webofscience.com/api/gateway?GWVer...
