In [11]:
import os
import requests
import json
import csv
import pandas as pd
#conda install -c conda-forge python-dotenv
from dotenv import load_dotenv
from time import sleep
load_dotenv()

True

In [12]:
API_KEY = os.getenv("API_KEY")
API_COUNT = 25
header = {
    "X-ELS-APIKey": API_KEY,
}
api_url = "http://api.elsevier.com/content/search/author?"

In [14]:
###### !!! CHANGE THESE ######
# columns of the input file

PATH_TO_INPUT_CSV = "sample.csv"
OUTPUT_FILEPATH = "output.csv"

cols = ['Last Name','First Name','Member Type','Mental Health & Addictions','Brain Development & Neurodevelopmental Disorders','Learning Memory & Dementias','Sensory Motor Systems & Movement Disorders','Brain Injury & Repair','Google Scholar?','GS Link']

# subjects that can be matched with the list of authors
subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC"]

# affiliated institutions, cities, or countries
affiliation_list = ["The University of British Columbia", "Djavad Mowafaghian Centre for Brain Health", "BC Children's Hospital Research Institute", "University of British Columbia Okanagan", "University of British Columbia, Faculty of Medicine", "Vancouver"]


In [15]:
def get_api_json(query: str, start=0):
    params = {
        "query":query,
        "start":start,
        "count":API_COUNT,
    }
    try:
        response = requests.get(url=api_url, params=params, headers=header)
        response.raise_for_status
        data = json.loads(response.text)
    except requests.exceptions.HTTPError as e:
        print("HTTPS error: ", e)
    except requests.exceptions.RetryError as e:
        print("Max retries exceeded: ", e)
    return data

In [137]:
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[2:]
print(len(member_arr))

129


In [138]:
with open(PATH_TO_INPUT_CSV, "r") as csvfile:
    r = csv.reader(csvfile)
    

In [139]:
last_names = []
first_names = []
affils = []
auth_ids = []
orcids = []
tags = []

warnings = []
subjects = []

no_search_results = []

for member in member_arr:
    start = 0
    sleep(1)

    lastname = member[0].strip()
    firstname = member[1].strip()

    print("searching for " + firstname + " " + lastname)

    query = f"authlastname({lastname}) and authfirst({firstname})"
    data = get_api_json(query, start=0)

    warning = ""

    num_authors_found = int(data["search-results"]['opensearch:totalResults'])
    
    author_idx = 0
    if num_authors_found == 0:
        print("no scival profile found for " + firstname + " " + lastname + "\n")
        no_search_results.append(member)
        continue

    # pages_to_search = int(num_authors_found//API_COUNT)
    
    found_match = True
    if num_authors_found > 1:
        print("multiple profiles found for " + firstname + " " + lastname)
        warning = warning + "multiple profiles; "
        first_match = False
        best_match = 0
        for author_number in range(num_authors_found):
            if author_number >= API_COUNT:
                break

            print("looking at profile " + str(author_number))
            try:
                if any([x in str(data["search-results"]["entry"][author_number]["affiliation-current"]) for x in affiliation_list]) and any([x in str(data["search-results"]["entry"][author_number]["subject-area"]) for x in subjects_list]):
                    print("subject and location match found")
                    print("updating best match")
                    best_match = author_number
                    break
            except:
                print("no affiliation with associated profile")

            try:
                if first_match == False and any([x in str(data["search-results"]["entry"][author_number]["subject-area"]) for x in subjects_list]):
                    print("subject match found")
                    if first_match == False:
                        print("updating best match")
                        best_match = author_number
                    first_match = True
            except:
                print("no subject with associated profile")

        if found_match == False:
            warning = warning + "no match! "
            print("no matching profile found")
        author_idx = best_match
    
    affil_name = ""
    try:
        affil_name = data["search-results"]["entry"][author_idx]["affiliation-current"]["affiliation-name"]
        if affil_name not in affiliation_list:
            warning = warning + "affiliation not in list"
    except:
        warning = "no affiliation"
        
    author_id = data["search-results"]["entry"][author_idx]["dc:identifier"].split(":")[1]
    # affil_id = data["search-results"]["entry"][0]["affiliation-current"]["affiliation-id"]

    subject = ""
    try:
        for i in data["search-results"]["entry"][author_idx]["subject-area"]:
            subject = f'{i["@abbrev"]} - {i["@frequency"]} publications; ' + subject
    except:
        pass

    orcid = ""
    try:
        orcid = data["search-results"]["entry"][author_idx]["orcid"]
    except:
        pass
    
    #find primary IRP to add as tag
    for i in range(3,8):
        if member[i].strip() == "Primary":
            tags.append(cols[i])

    scopus_lastname = data["search-results"]["entry"][i]["preferred-name"]['surname']
    scopus_firstname = data["search-results"]["entry"][i]["preferred-name"]['given-name']

    last_names.append(scopus_lastname)
    first_names.append(scopus_firstname)
    affils.append(affil_name)
    auth_ids.append(author_id)
    warnings.append(warning)
    subjects.append(subject)
    orcids.append(orcid)

    print("\n")

searching for Khaled Abd-Elrahman


searching for Douglas Allan
multiple profiles found for Douglas Allan
looking at profile 0
looking at profile 1
subject and location match found
updating best match


searching for Doug Altshuler
no scival profile found for Doug Altshuler

searching for Vanessa Auld


searching for Jehannine Austin


searching for Shelina Babul
multiple profiles found for Shelina Babul
looking at profile 0
subject and location match found
updating best match


searching for Shernaz Bamji


searching for Phil Barker
multiple profiles found for Phil Barker
looking at profile 0
subject match found
updating best match
looking at profile 1
subject and location match found
updating best match


searching for Steven Barnes
multiple profiles found for Steven Barnes
looking at profile 0
subject match found
updating best match
looking at profile 1
subject and location match found
updating best match


searching for Alasdair Barr


searching for Jason Barton
multiple profiles f

In [140]:
out_dict = {
    "Last Name": last_names,
    "First Name": first_names,
    "Affiliation": affils,
    "Scopus Author ID": auth_ids,
    "ORCID": orcids,
    "Tags": tags,
    "Research Areas": subjects,
    "Warnings": warnings,
}
print(no_search_results)

for key in out_dict:
    print(len(out_dict[key]))

[['Altshuler', 'Doug', 'Full', '', '', '', 'Primary', '', '1', '', '', 'https://scholar.google.com/citations?hl=en&user=EVACejsAAAAJ']]
128
128
128
128
128
128
128
128


In [None]:
df = pd.DataFrame.from_dict(out_dict)
df.to_csv(OUTPUT_FILEPATH)

use API to look through the any particular profiles if needed

In [16]:
# !!! modify lastname and firstname
lastname = "West"
firstname = "Christopher"



In [17]:
query = f"authlastname({lastname}) and authfirst({firstname})"
data = get_api_json(query, start=0)
print(data)

{'service-error': {'status': {'statusCode': 'AUTHORIZATION_ERROR', 'statusText': 'The requestor is not authorized to access the requested view or fields of the resource'}}}


In [None]:
print(int(data["search-results"]['opensearch:totalResults']))

for i in range(int(data["search-results"]['opensearch:totalResults'])):
    print("profile "+str(i))
    try:
        print(data["search-results"]["entry"][i]["preferred-name"])
        print("author ID: " + data["search-results"]["entry"][i]["dc:identifier"].split(":")[1])
        print("affiliation: " + str(data["search-results"]["entry"][i]["affiliation-current"]['affiliation-name']))
        print("subjects: " + str(data["search-results"]["entry"][i]["subject-area"]))
        print(any([x in str(data["search-results"]["entry"][i]["subject-area"]) for x in subjects_list]))
    except:
        print("info incomplete")
    
    print("\n")