In [15]:
import os
import requests
import json
import csv
import pandas as pd
from dotenv import load_dotenv
from time import sleep
load_dotenv()

True

In [16]:
API_KEY = os.getenv("API_KEY")
API_COUNT = 25
header = {
    "X-ELS-APIKey": API_KEY,
}
API_ENDPOINT = "http://api.elsevier.com/content/search/author?"

In [14]:
###### !!! CHANGE THESE ######
# columns of the input file

# PATH_TO_INPUT_CSV = "in/sample_irp_sorted.csv"
# OUTPUT_FILEPATH = "out/author_lookup_output.csv"

# cols = ['Last Name','First Name','Member Type','Mental Health & Addictions','Brain Development & Neurodevelopmental Disorders','Learning Memory & Dementias','Sensory Motor Systems & Movement Disorders','Brain Injury & Repair','Google Scholar?','GS Link']


# subjects that can be matched with the list of authors
# subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC"]

# affiliated institutions, cities, or countries
# affiliation_list = ["The University of British Columbia", "Djavad Mowafaghian Centre for Brain Health", "BC Children's Hospital Research Institute", "University of British Columbia Okanagan", "University of British Columbia, Faculty of Medicine", "Vancouver"]


In [19]:
PATH_TO_INPUT_CSV = "in/PSYT faculty member lists UTF8.csv"
OUTPUT_FILEPATH = "out/PSYT faculty member lists SciVal formatted.csv"
cols = ['Last Name', 'First Name']
subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC", "HEAL", "IMMU", "NURS", "PHAR"]
affiliation_list = ["The University of British Columbia", "Djavad Mowafaghian Centre for Brain Health", "BC Children's Hospital Research Institute", "University of British Columbia Okanagan", "University of British Columbia, Faculty of Medicine", "Vancouver"]

In [20]:
def get_api_json(query: str, start=0):
    params = {
        "query":query,
        "start":start,
        "count":API_COUNT,
    }
    try:
        response = requests.get(url=API_ENDPOINT, params=params, headers=header)
        response.raise_for_status
        data = json.loads(response.text)
    except requests.exceptions.HTTPError as e:
        print("HTTPS error: ", e)
    except requests.exceptions.RetryError as e:
        print("Max retries exceeded: ", e)
    return data

In [21]:
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[1:]
print(len(member_arr))

802


In [30]:
last_names = []
first_names = []
affils = []
auth_ids = []
orcids = []
tags = []

warnings = []
subjects = []

no_search_results = []

#go through all members
for member in member_arr:
    start = 0
    sleep(1) #pause for one second to prevent API warning

    #format first and last names
    lastname = member[0].strip()
    firstname = member[1].strip()

    print("searching for " + firstname + " " + lastname)

    query = f"authlastname({lastname}) and authfirst({firstname})"
    data = get_api_json(query, start=0)

    warning = ""

    num_authors_found = int(data["search-results"]['opensearch:totalResults'])
    print(f"{num_authors_found} results")
    
    #returns number of authors found
    author_idx = 0
    if num_authors_found == 0:
        print("no scival profile found for " + firstname + " " + lastname + "\n")
        last_names.append(lastname)
        first_names.append(firstname)
        affils.append("")
        auth_ids.append("")
        warnings.append("no profile found")
        subjects.append("")
        orcids.append("")
        no_search_results.append(member)
        continue

    # pages_to_search = int(num_authors_found//API_COUNT)
    
    found_match = True
    #multiple authors found with first/last name search
    if num_authors_found > 1:
        print("multiple profiles found for " + firstname + " " + lastname)
        warning = warning + "multiple profiles; "
        first_match = False
        best_match = 0

        #look through all the authors
        for author_idx in range(num_authors_found):
            if author_idx >= API_COUNT:
                break

            print("looking at profile " + str(author_idx))
            #match author for desired affiliation
            try:
                if any([x in str(data["search-results"]["entry"][author_idx]["affiliation-current"]) for x in affiliation_list]) and any([x in str(data["search-results"]["entry"][author_idx]["subject-area"]) for x in subjects_list]):
                    print("subject and location match found")
                    print("updating best match")
                    best_match = author_idx #sets best match as first author at institution
                    break
            except:
                print("no affiliation with associated profile")

            #match author for desired subjects
            try:
                if first_match == False and any([x in str(data["search-results"]["entry"][author_idx]["subject-area"]) for x in subjects_list]):
                    print("subject match found")
                    if first_match == False:
                        print("updating best match")
                        best_match = author_idx #sets best match as author that publishes in the subject matter
                    first_match = True
            except:
                print("no subject with associated profile")

        if found_match == False:
            warning = warning + "no match! "
            print("no matching profile found")
        author_idx = best_match
    
    #get author affiliation name
    affil_name = ""
    try:
        affil_name = data["search-results"]["entry"][author_idx]["affiliation-current"]["affiliation-name"]
        if affil_name not in affiliation_list:
            warning = warning + "affiliation not in list; "
    except:
        warning = "no affiliation"
    
    #get author scopus id
    author_id = data["search-results"]["entry"][author_idx]["dc:identifier"].split(":")[1]
    # affil_id = data["search-results"]["entry"][0]["affiliation-current"]["affiliation-id"]

    #get author publication topics
    subject = ""
    try:
        for i in data["search-results"]["entry"][author_idx]["subject-area"]:
                subject = f'{i["@abbrev"]} - {i["@frequency"]} publications; ' + subject
        if not any([x in str(data["search-results"]["entry"][author_idx]["subject-area"]) for x in subjects_list]):
            warning = warning + "subject mismatch"
    except:
        warning = warning + "subject not in list; "

    #get author orcid if available
    orcid = ""
    try:
        orcid = data["search-results"]["entry"][author_idx]["orcid"]
    except:
        pass
    
    #find primary IRP to add as tag
    # for i in range(3,8):
    #     if member[i].strip() == "Primary":
    #         tags.append(cols[i])
    
    #add variables to respective arrays to turn into csv
    try:
        scopus_lastname = data["search-results"]["entry"][author_idx]["preferred-name"]['surname']
        scopus_firstname = data["search-results"]["entry"][author_idx]["preferred-name"]['given-name']
    except:
        scopus_lastname = lastname
        scopus_firstname = firstname


    last_names.append(scopus_lastname)
    first_names.append(scopus_firstname)
    affils.append(affil_name)
    auth_ids.append(author_id)
    warnings.append(warning)
    subjects.append(subject)
    orcids.append(orcid)

    print("\n")

searching for Pieter Aartsma
0 results
no scival profile found for Pieter Aartsma

searching for Mohamed Abdel-Fattah
21 results
multiple profiles found for Mohamed Abdel-Fattah
looking at profile 0
subject match found
updating best match
looking at profile 1
looking at profile 2
looking at profile 3
looking at profile 4
looking at profile 5
looking at profile 6
looking at profile 7
looking at profile 8
looking at profile 9
looking at profile 10
looking at profile 11
looking at profile 12
looking at profile 13
looking at profile 14
looking at profile 15
looking at profile 16
looking at profile 17
looking at profile 18
looking at profile 19
looking at profile 20


searching for Ozotu Abu
0 results
no scival profile found for Ozotu Abu

searching for Trudy Jean Adam
0 results
no scival profile found for Trudy Jean Adam

searching for Leona Adams
0 results
no scival profile found for Leona Adams

searching for Adetokunbo Adeshina
1 results


searching for Qasim Afridi
0 results
no scival 

KeyboardInterrupt: 

In [28]:
out_dict = {
    "Last Name": last_names,
    "First Name": first_names,
    "Affiliation": affils,
    "Scopus Author ID": auth_ids,
    "ORCID": orcids,
    "Research Areas": subjects,
    "Warnings": warnings,
}
print(no_search_results)

# out_dict = {
#     "Last Name": last_names,
#     "First Name": first_names,
#     "Affiliation": affils,
#     "Scopus Author ID": auth_ids,
#     "ORCID": orcids,
#     "Tags": tags,
#     "Research Areas": subjects,
#     "Warnings": warnings,
# }
# print(no_search_results)

for key in out_dict:
    print(len(out_dict[key]))

[['Aartsma', 'Pieter'], ['Abu', 'Ozotu'], ['Adam', 'Trudy Jean'], ['Adams', 'Leona'], ['Afridi', 'Qasim'], ['Agafonov', 'Marina'], ['Agboji', 'Hezekiah'], ['Ainsworth', 'Nick'], ['Allibhai', 'Fatima'], ['Almas', 'Naveed'], ['Amarjothi Nagendran', 'Sreedharan'], ['Argouarch', 'Sylvie']]
31
31
31
31
31
31
31


In [29]:
df = pd.DataFrame.from_dict(out_dict)
df.to_csv("original1.csv")

use API to look through the any particular profiles if needed

In [16]:
# !!! modify lastname and firstname
lastname = "West"
firstname = "Christopher"

In [17]:
query = f"authlastname({lastname}) and authfirst({firstname})"
data = get_api_json(query, start=0)
print(data)

{'service-error': {'status': {'statusCode': 'AUTHORIZATION_ERROR', 'statusText': 'The requestor is not authorized to access the requested view or fields of the resource'}}}


In [None]:
print(int(data["search-results"]['opensearch:totalResults']))

for i in range(int(data["search-results"]['opensearch:totalResults'])):
    print("profile "+str(i))
    try:
        print(data["search-results"]["entry"][i]["preferred-name"])
        print("author ID: " + data["search-results"]["entry"][i]["dc:identifier"].split(":")[1])
        print("affiliation: " + str(data["search-results"]["entry"][i]["affiliation-current"]['affiliation-name']))
        print("subjects: " + str(data["search-results"]["entry"][i]["subject-area"]))
        print(any([x in str(data["search-results"]["entry"][i]["subject-area"]) for x in subjects_list]))
    except:
        print("info incomplete")
    
    print("\n")