The aim of this script is to retrieve information for a determined researcher's works, in a way that only manual input -and thing that the person who runs this script should know- is the author's ORCID. 

Information about the author's works is retrieved using the ORCID API and saved into a JSON file in the local directory. That file is then parsed to obtain information about the year of publication, DOI and title of each work and save it in CSV format.

Input: ORCID

Output(s): author's works in JSON, publication data in CSV


In [20]:
import requests
import json
import csv

## PART I. MANUAL INPUTS
orcid = "0000-0001-6960-357X" #introduce the ORCID of the corresponding researcher/author
url = f"https://pub.orcid.org/v3.0/{orcid}/works"
headers = {"Accept": "application/json"}

works_file= "works.json" #file that will contain the works 
pub_file= "publications.csv" #file that will contain the publications 


##PART II. GETTING THE AUTHOR'S WORKS into works_file
with open(works_file, "w") as output: 
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        json.dump(data, output, indent=4) 
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")

##PART III. GETTING THE PUBLICATIONS INFORMATION into pub_file

with open(pub_file, "w", newline="", encoding="utf-8") as output:
    writer = csv.writer(output)
    writer.writerow(["year", "doi", "title"])  #headers
 
    with open(works_file, "r", encoding="utf-8") as f:
        record = json.load(f)
        
    works = record.get("group", [])
    for work in works:
        doi = ""
        title = ""
        
        #get a summary for each work
        summaries = work.get("work-summary", []) 
        for summary in summaries:
            if not summary or not summary.get("title"):
                continue

            #get title 
            title_info = summary.get("title", {}).get("title", {})
            title = title_info.get("value", "").strip()

            #get DOI from external IDs
            external_ids = summary.get("external-ids", {})
            if not external_ids:
                continue
            all_ids = external_ids.get("external-id", [])
            for external_id in all_ids:
                if external_id.get("external-id-type") == "doi":
                    doi = external_id.get("external-id-value", "").strip()
                    
            #get year 
            date = summary.get("publication-date", {}).get("year", {})
            year = date.get("value", "").strip()
        
            #skip if DOI is empty
            if not doi:
                continue
            

            writer.writerow([year, doi, title])  #save data 
