In [30]:
import os
import requests
import json
import csv
import pandas as pd
from dotenv import load_dotenv
from time import sleep
load_dotenv()

from pybliometrics.scopus.utils import config
print(config['Authentication'] ['APIKey'])
config['Authentication'] ['APIKey'] = "127f5bf5de20d338f686704a9a328b86"
print(config['Authentication'] ['APIKey'])

2e5922c95a0e4f1fb73cede25eca779a
127f5bf5de20d338f686704a9a328b86


In [31]:
from pybliometrics.scopus import CitationOverview, AuthorRetrieval, AuthorSearch

In [46]:
PATH_TO_INPUT_CSV = "authorlist_names_only.csv"
OUTPUT_FILEPATH = "ss_scival_authorlist.csv"

# https://dev.elsevier.com/sc_author_search_tips.html for list of subjects
subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC", "HEAL", "IMMU", "NURS", "PHAR"]
affiliation_list = ['University of British Columbia', 'UBC', 'Djavad Mowafaghian', 'Simon Fraser University', 'University of Victoria', 'University of Washington']
city_list = ["Vancouver", "Victoria", "Seattle"]
# affil_id_list = ['60023077', '60010365', '60012423'] #[UBC faculty of medicine, UBC, UBC hospital]

affil_search_string = "affil(UBC or University of British Columbia or Djavad Mowafaghian or Simon Fraser University or University of Victoria or University of Washington') and subjarea(NEUR or BIOC or MEDI or PSYC or HEAL or IMMU or NURS or PHAR)"
cols_of_interest = ["Last Name", "First Name", "Affiliation", "Author ID", "ORCID", "Research Areas", "Warning"]

In [47]:
def is_matching_subjects(author) -> bool:
    try:
        return any([topic in author.areas for topic in subjects_list])
    except:
        print("no subjects on profile")
        return False

In [48]:
def is_matching_affils(author) -> bool:
    try:
        # return (any([affil in author.affiliation for affil in affiliation_list]) 
        #         and any([city in author.city for city in city_list]))
        return (any([affil in author.affiliation for affil in affiliation_list]) 
                or any([city in author.city for city in city_list]))
    except:
        print("no affiliation on profile")
        return False

In [64]:
def is_target_profile(author) -> bool:
    '''criteria for whether or not profile matches the target requirements'''
    print("subject match:",is_matching_subjects(author))
    print("affiliation match:",is_matching_affils(author))
    return (is_matching_subjects(author) or is_matching_affils(author))
    # return (is_matching_subjects(author) and is_matching_affils(author))

    # subject_check = False
    # affiliation_check = False
    # city_check = False
    # country_check = False

    # try:
    #     subject_check = any([topic in author.areas for topic in subjects_list])
    # except:
    #     print("no subject with associated profile")

    # try:
    #     affiliation_check = any([affil in affiliation_list for affil in author.affiliation])
    #     city_check = any([city in city_list for city in author.city]) 
    #     country_check = any([country in affiliation_list for country in author.country])
    # except:
    #     print("no affiliation with associated profile")

    # # if subject and affiliations match
    # if subject_check and affiliation_check:
    #     return True
    # elif affiliation_check:
    #     return True
    # elif (city_check or country_check) and subject_check:
    #     return True
    # else:
    #     return False

In [65]:
def add_author_row(author:tuple, is_target, out_dict, warning="", affil_override="", subj_override=""):
    '''
    fills in author information to the output dictionary.
    if is_target is True, meaning the author matches criteria, author variable should be a pybliometrics Author tuple. 
    if is_target is False, meaning author is not found or does not match criteria, author variable should be a tuple of
    ("author last name", "author first name")
    '''
    match is_target:
        case True:
            #add author information to dictionary
            lastname = author.surname
            firstname = author.givenname
            print("adding " + firstname + " " + lastname + "...\n")

            affil_name = author.affiliation
            author_id = author.eid.split("-")[-1]
            orcid = ""
            try:
                orcid = author.orcid
            except:
                pass
            subject = author.areas

            out_dict[cols_of_interest[0]].append(lastname) #"Last Name"
            out_dict[cols_of_interest[1]].append(firstname) #"First Name"
            out_dict[cols_of_interest[2]].append(affil_name) #"Affiliation"
            out_dict[cols_of_interest[3]].append(author_id) #"Author ID"
            out_dict[cols_of_interest[4]].append(orcid) #"ORCID"
            out_dict[cols_of_interest[5]].append(subject) #"Research Areas"
            out_dict[cols_of_interest[6]].append("") #"Warning"
        case False:
            #add blank rows with warning or affiliation/research area overrides
            lastname = author[0]
            firstname = author[1]
            print("no scival profile found\n")
            out_dict[cols_of_interest[0]].append(lastname) #"Last Name"
            out_dict[cols_of_interest[1]].append(firstname) #"First Name"
            out_dict[cols_of_interest[2]].append(affil_override) #"Affiliation"
            out_dict[cols_of_interest[3]].append("") #"Author ID"
            out_dict[cols_of_interest[4]].append("") #"ORCID"
            out_dict[cols_of_interest[5]].append(subj_override) #"Research Areas"
            out_dict[cols_of_interest[6]].append(warning) #"Warning"
            

In [66]:
def compare_all_profiles(authors):
    for i in range(len(authors)):
        print("looking at profile " + str(i))
        if is_target_profile(authors[i]):
            return authors[i]
    return None

In [67]:
#read member csv file accordingly
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
# member_arr = member_arr[1:]
print(len(member_arr))

40


In [70]:
# if changing columns, make sure to change code in add_author_row() as well!
output_dict = {f"{key}":[] for key in cols_of_interest}

#go through all members
for member in member_arr:
    start = 0
    sleep(1) #pause for one second to prevent API warning

    #search for author
    lastname = member[0].rsplit(" ",1)[1]
    firstname = member[0].rsplit(" ",1)[0]
    # lastname = member[0].strip()
    # firstname = member[1].strip()
    print(f"searching for author {firstname} {lastname}")

    # query = f"authlastname({lastname}) and authfirst({firstname}) and {affil_search_string}"
    query = f"authlastname({lastname}) and authfirst({firstname})"
    s = AuthorSearch(query)
    authors = s.authors
    
    num_authors_found = s.get_results_size()

    match num_authors_found:
        case 0:
            #if there are no profiles, add blank row
            add_author_row((lastname, firstname), False, output_dict, warning=f"no profiles found")
            # add_author_row((lastname, firstname), False, output_dict, warning=f"no profiles found with query ({affil_search_string})")
        case 1:
            #if there is a profile, save if subjects and affiliations match
            if is_target_profile(authors[0]):
                add_author_row(authors[0], True, output_dict)
            else:
                add_author_row((authors[0].surname, authors[0].givenname), 
                               False, output_dict, warning=f"!!! profile for {firstname} {lastname} does not pass addition condition",
                               affil_override=authors[0].affiliation,
                               subj_override=authors[0].areas)
        case _:
            #if there are multiple profile, take a look through each
            best_match_author = compare_all_profiles(authors)
            if best_match_author == None:
                add_author_row((authors[0].surname, authors[0].givenname), 
                               False, output_dict, warning=f"!!! FIRST profile for {firstname} {lastname} does not pass addition condition",
                               affil_override=authors[0].affiliation,
                               subj_override=authors[0].areas)
                print("no match found among profiles\n")
            else:
                add_author_row(best_match_author, True, output_dict)

searching for author Tim H. Murphy
looking at profile 0
subject match: True
affiliation match: True
adding Timothy H. Murphy...

searching for author Annie Ciernia
looking at profile 0
subject match: True
affiliation match: True
adding Annie Vogel-Ciernia...

searching for author Brian MacVicar
subject match: True
affiliation match: True
adding Brian Archibald MacVicar...

searching for author Fidel Vila-Rodriguez
subject match: True
affiliation match: True
adding Fidel Vila-Rodriguez...

searching for author Shernaz Bamji
subject match: True
affiliation match: True
adding Shernaz X. Bamji...

searching for author Lara Boyd
looking at profile 0
subject match: True
affiliation match: True
adding Lara A. Boyd...

searching for author Paul Pavlidis
looking at profile 0
subject match: True
affiliation match: True
adding Paul Pavlidis...

searching for author Martin McKeown
subject match: True
affiliation match: True
adding Martin J. McKeown...

searching for author A Jon Stoessl
subject ma

KeyboardInterrupt: 

In [69]:
# save file
df = pd.DataFrame.from_dict(output_dict)
df.to_csv("scival_ids_authorlist_adjusted.csv")
# df.to_csv(OUTPUT_FILEPATH)

In [26]:
lastname = "Woodward" 
firstname = "Todd"
query = f"authlastname({lastname}) and authfirst({firstname})"
s = AuthorSearch(query)
authors = s.authors

In [27]:
print(authors)

[Author(eid='9-s2.0-7102848867', orcid='0000-0001-8083-0079', surname='Woodward', initials='T.S.', givenname='Todd Stephen', affiliation='The University of British Columbia', documents=185, affiliation_id='60010365', city='Vancouver', country='Canada', areas='MEDI (203); NEUR (133); PSYC (126)')]


In [72]:
lastname = "sylwestrak" 
firstname = "emily"
query = f"authlastname({lastname}) and authfirst({firstname})"
s = AuthorSearch(query)
authors = s.authors
print(authors)

[Author(eid='9-s2.0-26634611400', orcid=None, surname='Sylwestrak', initials='E.L.', givenname='Emily Lauren', affiliation='University of Oregon', documents=10, affiliation_id='60012317', city='Eugene', country='United States', areas='NEUR (5); MULT (3); MEDI (3)')]
