In [1]:
import os
import requests
import json
import csv
import pandas as pd
#conda install -c conda-forge python-dotenv
from dotenv import load_dotenv
from time import sleep
load_dotenv()

True

In [2]:

PATH_TO_DMCBH_MEMBERS_CSV = "authorlist_names_only.csv"
API_KEY = os.getenv("API_KEY")
header = {
    "X-ELS-APIKey": API_KEY,
}
api_url = "http://api.elsevier.com/content/search/author?"

In [3]:
def get_api_json(params: dict):
    try:
        response = requests.get(url=api_url, params=params, headers=header)
        response.raise_for_status
        data = json.loads(response.text)
    except requests.exceptions.HTTPError as e:
        print("HTTPS error: ", e)
    except requests.exceptions.RetryError as e:
        print("Max retries exceeded: ", e)
    return data

In [4]:
member_arr = []
with open(PATH_TO_DMCBH_MEMBERS_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[2:]
print(len(member_arr))

# columns of the DMCBH CSV file
cols = ['Last Name','First Name','Member Type','Mental Health & Addictions','Brain Development & Neurodevelopmental Disorders','Learning Memory & Dementias','Sensory Motor Systems & Movement Disorders','Brain Injury & Repair','Google Scholar?','GS Link']




39


In [5]:
subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC"]
affiliation_list = ["The University of British Columbia", "Djavad Mowafaghian Centre for Brain Health", "BC Children's Hospital Research Institute", "Vancouver"]

In [6]:
last_names = []
first_names = []
affils = []
auth_ids = []
orcids = []
tags = []

warnings = []
subjects = []

no_search_results = []

for member in member_arr:
    sleep(1)
    lastname = member[0].strip()
    firstname = member[1].strip()

    print("searching for " + firstname + " " + lastname)

    query = f"authlastname({lastname}) and authfirst({firstname})"
    params = {
        "query":query
    }

    data = get_api_json(params)

    warning = ""

    num_authors_found = int(data["search-results"]['opensearch:totalResults'])
    author_idx = 0
    if num_authors_found == 0:
        print("no scival profile found for " + firstname + " " + lastname + "\n")
        no_search_results.append(member)
        continue

    found_match = True
    
    if num_authors_found > 1:
        print("multiple profiles found for " + firstname + " " + lastname)
        warning = warning + "multiple profiles; "
        first_match = False
        best_match = 0
        for author_number in range(num_authors_found):
            print("looking at profile " + str(author_number))

            try:
                if any([x in str(data["search-results"]["entry"][author_number]["subject-area"]) for x in subjects_list]):
                    print(f"subject match found")
                    if first_match == False:
                        print("updating best match")
                        best_match = author_number    
                    first_match = True
            except:
                print("no subject with associated profile")
                
            try:
                if any([x in str(data["search-results"]["entry"][author_number]["affiliation-current"]) for x in affiliation_list]) and any([x in str(data["search-results"]["entry"][author_number]["subject-area"]) for x in subjects_list]):
                    print(f"subject and location match found")
                    print("updating best match")
                    best_match = author_number
                    break
            except:
                print("no affiliation with associated profile")

        if found_match == False:
            warning = warning + "no match! "
            print("no matching profile found")
        author_idx = best_match
        
    try:
        affil_name = data["search-results"]["entry"][author_idx]["affiliation-current"]["affiliation-name"]
        if affil_name not in affiliation_list:
            warning = warning + "affiliation not in list"
    except:
        warning = "no affiliation"
        affil_name = ""

    author_id = data["search-results"]["entry"][author_idx]["dc:identifier"].split(":")[1]
    # affil_id = data["search-results"]["entry"][0]["affiliation-current"]["affiliation-id"]

    subject = ""
    try:
        for i in data["search-results"]["entry"][author_idx]["subject-area"]:
            subject = f'{i["@abbrev"]} - {i["@frequency"]} publications; ' + subject
    except:
        pass

    orcid = ""
    try:
        orcid = data["search-results"]["entry"][author_idx]["orcid"]
    except:
        pass
    
    #find primary IRP
    for i in range(3,8):
        if member[i].strip() == "Primary":
            tags.append(cols[i])

    last_names.append(lastname)
    first_names.append(firstname)
    affils.append(affil_name)
    auth_ids.append(author_id)
    warnings.append(warning)
    subjects.append(subject)
    orcids.append(orcid)

    print("\n")

IndexError: list index out of range

In [102]:
out_dict = {
    "Last Name": last_names,
    "First Name": first_names,
    "Affiliation": affils,
    "Scopus Author ID": auth_ids,
    "ORCID": orcids,
    "Tags": tags,
    "Research Areas": subjects,
    "Warnings": warnings,
}
print(no_search_results)

[['Altshuler', 'Doug', 'Full', '', '', '', 'Primary', '', '1', '', '', 'https://scholar.google.com/citations?hl=en&user=EVACejsAAAAJ']]


In [76]:
df = pd.DataFrame.from_dict(out_dict)
df.to_csv("DMCBH_scival_input_bestmatch.csv")

use API to find one researcher in particular

In [95]:
lastname = "Gordon"
firstname = "Michael"
query = f"authlastname({lastname}) and authfirst({firstname})"
params = {
    "query":query
}
data = get_api_json(params)

In [97]:
print(data["search-results"])

{'opensearch:totalResults': '180', 'opensearch:startIndex': '0', 'opensearch:itemsPerPage': '25', 'opensearch:Query': {'@role': 'request', '@searchTerms': 'authlastname(Gordon) and authfirst(Michael)', '@startPage': '0'}, 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/search/author?start=0&count=25&query=authlastname%28Gordon%29+and+authfirst%28Michael%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'first', '@href': 'https://api.elsevier.com/content/search/author?start=0&count=25&query=authlastname%28Gordon%29+and+authfirst%28Michael%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'next', '@href': 'https://api.elsevier.com/content/search/author?start=25&count=25&query=authlastname%28Gordon%29+and+authfirst%28Michael%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'last', '@href': 'https://api.elsevier.com/content/search/author?start=155&count=25&query=authlastname%28Gordon%29+and+authfirst%28Michael%29', '@type'

In [98]:
# print(data)
#BIOC, MEDI, NEUR
print(int(data["search-results"]['opensearch:totalResults']))
subjects = ["NEUR", "BIOC", "MEDI", "PSYC", ]
for i in range(int(data["search-results"]['opensearch:totalResults'])):
    print("profile "+str(i))
    try:
        print(data["search-results"]["entry"][i]["preferred-name"])
        print("author ID: " + data["search-results"]["entry"][i]["dc:identifier"].split(":")[1])
        print("affiliation: " + str(data["search-results"]["entry"][i]["affiliation-current"]))
        print("subjects: " + str(data["search-results"]["entry"][i]["subject-area"]))
        print(any([x in str(data["search-results"]["entry"][i]["subject-area"]) for x in subjects]))
    except:
        print("info incomplete")
    
    print("\n")



180
profile 0
{'surname': 'Gordon', 'given-name': 'Michael S.', 'initials': 'M.S.'}
author ID: 7402801321
affiliation: {'affiliation-url': 'https://api.elsevier.com/content/affiliation/affiliation_id/60272352', 'affiliation-id': '60272352', 'affiliation-name': 'HonorHealth', 'affiliation-city': 'Scottsdale', 'affiliation-country': 'United States'}
subjects: [{'@abbrev': 'MEDI', '@frequency': '305', '$': 'Medicine (all)'}, {'@abbrev': 'BIOC', '@frequency': '159', '$': 'Biochemistry, Genetics and Molecular Biology (all)'}, {'@abbrev': 'PHAR', '@frequency': '52', '$': 'Pharmacology, Toxicology and Pharmaceutics (all)'}]
True


profile 1
{'surname': 'Gordon', 'given-name': 'Michael S.', 'initials': 'M.S.'}
author ID: 35069411100
affiliation: {'affiliation-url': 'https://api.elsevier.com/content/affiliation/affiliation_id/60016849', 'affiliation-id': '60016849', 'affiliation-name': 'University of Toronto', 'affiliation-city': 'Toronto', 'affiliation-country': 'Canada'}
subjects: [{'@abbrev'

In [12]:
print(data["search-results"]["entry"][0])
print(data["search-results"]["entry"][0]["dc:identifier"].split(":")[1])
print(data["search-results"]["entry"][0]["affiliation-current"])
print(data["search-results"]["entry"][0]["subject-area"])



{'@_fa': 'true', 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/author/author_id/7004622834'}, {'@_fa': 'true', '@ref': 'search', '@href': 'https://api.elsevier.com/content/search/author?query=au-id%287004622834%29'}, {'@_fa': 'true', '@ref': 'scopus-citedby', '@href': 'https://www.scopus.com/author/citedby.uri?partnerID=HzOxMe3b&citedAuthorId=7004622834&origin=inward'}, {'@_fa': 'true', '@ref': 'scopus-author', '@href': 'https://www.scopus.com/authid/detail.uri?partnerID=HzOxMe3b&authorId=7004622834&origin=inward'}], 'prism:url': 'https://api.elsevier.com/content/author/author_id/7004622834', 'dc:identifier': 'AUTHOR_ID:7004622834', 'eid': '9-s2.0-7004622834', 'orcid': '0000-0003-3976-9186', 'preferred-name': {'surname': 'Auld', 'given-name': 'Vanessa J.', 'initials': 'V.J.'}, 'name-variant': [{'@_fa': 'true', 'surname': 'Auld', 'given-name': 'V.', 'initials': 'V.'}, {'@_fa': 'true', 'surname': 'Auld', 'given-name': 'Vanessa', 'initials': 'V.'}, {

In [34]:
#UBC "affiliation-id":"60010365"

num_authors_found = data["search-results"]["opensearch:totalResults"]

if num_authors_found == 0:
    print("no author found")
if num_authors_found > 1:
    print("more than one author found")
    #find author with matching affiliation id

1


In [None]:
#add name, institution, etc to csv. tagged in diff irp groups?
