In [1]:
import os
import requests
import json
import csv
import pandas as pd
from dotenv import load_dotenv
from time import sleep
load_dotenv()

from pybliometrics.scopus.utils import config

In [152]:
from pybliometrics.scopus import CitationOverview, AuthorRetrieval, AuthorSearch

In [153]:
PATH_TO_INPUT_CSV = "in/PSYT faculty member lists UTF8.csv"
OUTPUT_FILEPATH = "out/PSYT faculty member lists SciVal formatted.csv"
cols = ['Last Name', 'First Name']

# https://dev.elsevier.com/sc_author_search_tips.html for list of subjects
subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC", "HEAL", "IMMU", "NURS", "PHAR"]
affiliation_list = ["The University of British Columbia", "Djavad Mowafaghian Centre for Brain Health", "UBC Hospital", 
                    "University of British Columbia, Faculty of Medicine","BC Children's and Women's Hospital",
                    "BC", "British Columbia", "University of California, San Francisco", "Defense Health Agency", "The University of Texas at Austin", "Canadian Centre for Behavioural Neuroscience",
                    "University of Ottawa", "Centre for Addiction and Mental Health", "Simon Fraser University"
                    ]
# affil_id_list = ['60023077', '60010365', '60012423'] #[UBC faculty of medicine, UBC, UBC hospital]
city_list = ["Vancouver", "Victoria", "Kelowna", "Burnaby", "Richmond", "Abbotsford", "Coquitlam"]
# country_list = ["Canada"]
affil_search_string = "affil(UBC or University of British Columbia or Hospital or BC or Canada) and subjarea(NEUR or BIOC or MEDI or PSYC or HEAL or IMMU or NURS or PHAR)"
cols_of_interest = ["Last Name", "First Name", "Affiliation", "Author ID", "ORCID", "Research Areas", "Warning"]

In [154]:
def is_matching_subjects(author) -> bool:
    try:
        return any([topic in author.areas for topic in subjects_list])
    except:
        print("no subjects on profile")
        return False

In [155]:
def is_matching_affils(author) -> bool:
    try:
        # return (any([affil in author.affiliation for affil in affiliation_list]) 
        #         and any([city in author.city for city in city_list]))
        return (any([affil in author.affiliation for affil in affiliation_list]) 
                or any([city in author.city for city in city_list]))
    except:
        print("no affiliation on profile")
        return False

In [156]:
def is_target_profile(author) -> bool:
    '''criteria for whether or not profile matches the target requirements'''
    return (is_matching_subjects(author) and is_matching_affils(author))

    # subject_check = False
    # affiliation_check = False
    # city_check = False
    # country_check = False

    # try:
    #     subject_check = any([topic in author.areas for topic in subjects_list])
    # except:
    #     print("no subject with associated profile")

    # try:
    #     affiliation_check = any([affil in affiliation_list for affil in author.affiliation])
    #     city_check = any([city in city_list for city in author.city]) 
    #     country_check = any([country in affiliation_list for country in author.country])
    # except:
    #     print("no affiliation with associated profile")

    # # if subject and affiliations match
    # if subject_check and affiliation_check:
    #     return True
    # elif affiliation_check:
    #     return True
    # elif (city_check or country_check) and subject_check:
    #     return True
    # else:
    #     return False

In [157]:
def add_author_row(author:tuple, is_target, out_dict, warning="", affil_override="", subj_override=""):
    '''
    fills in author information to the output dictionary.
    if is_target is True, meaning the author matches criteria, author variable should be a pybliometrics Author tuple. 
    if is_target is False, meaning author is not found or does not match criteria, author variable should be a tuple of
    ("author last name", "author first name")
    '''
    match is_target:
        case True:
            #add author information to dictionary
            lastname = author.surname
            firstname = author.givenname
            print("adding " + firstname + " " + lastname + "...\n")

            affil_name = author.affiliation
            author_id = author.eid.split("-")[-1]
            orcid = ""
            try:
                orcid = author.orcid
            except:
                pass
            subject = author.areas

            out_dict[cols_of_interest[0]].append(lastname) #"Last Name"
            out_dict[cols_of_interest[1]].append(firstname) #"First Name"
            out_dict[cols_of_interest[2]].append(affil_name) #"Affiliation"
            out_dict[cols_of_interest[3]].append(author_id) #"Author ID"
            out_dict[cols_of_interest[4]].append(orcid) #"ORCID"
            out_dict[cols_of_interest[5]].append(subject) #"Research Areas"
            out_dict[cols_of_interest[6]].append("") #"Warning"
        case False:
            #add blank rows with warning or affiliation/research area overrides
            lastname = author[0]
            firstname = author[1]
            print("no scival profile found\n")
            out_dict[cols_of_interest[0]].append(lastname) #"Last Name"
            out_dict[cols_of_interest[1]].append(firstname) #"First Name"
            out_dict[cols_of_interest[2]].append(affil_override) #"Affiliation"
            out_dict[cols_of_interest[3]].append("") #"Author ID"
            out_dict[cols_of_interest[4]].append("") #"ORCID"
            out_dict[cols_of_interest[5]].append(subj_override) #"Research Areas"
            out_dict[cols_of_interest[6]].append(warning) #"Warning"
            

In [158]:
def compare_all_profiles(authors):
    for i in range(len(authors)):
        print("looking at profile " + str(i))
        if is_target_profile(authors[i]):
            return authors[i]
    return None

In [159]:
#read member csv file accordingly
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[1:]
print(len(member_arr))

802


In [160]:
# if changing columns, make sure to change code in add_author_row() as well!
output_dict = {f"{key}":[] for key in cols_of_interest}

#go through all members
for member in member_arr:
    start = 0
    sleep(1) #pause for one second to prevent API warning

    #search for author
    lastname = member[0].strip()
    firstname = member[1].strip()
    print(f"searching for author {firstname} {lastname}")

    query = f"authlastname({lastname}) and authfirst({firstname}) and {affil_search_string}"
    s = AuthorSearch(query)
    authors = s.authors
    
    num_authors_found = s.get_results_size()

    match num_authors_found:
        case 0:
            #if there are no profiles, add blank row
            add_author_row((lastname, firstname), False, output_dict, warning=f"no profiles found with query ({affil_search_string})")
        case 1:
            #if there is a profile, save if subjects and affiliations match
            if is_target_profile(authors[0]):
                add_author_row(authors[0], True, output_dict)
            else:
                add_author_row((authors[0].surname, authors[0].givenname), 
                               False, output_dict, warning=f"!!! profile for {firstname} {lastname} does not pass addition condition",
                               affil_override=authors[0].affiliation,
                               subj_override=authors[0].areas)
        case _:
            #if there are multiple profile, take a look through each
            best_match_author = compare_all_profiles(authors)
            if best_match_author == None:
                add_author_row((authors[0].surname, authors[0].givenname), 
                               False, output_dict, warning=f"!!! FIRST profile for {firstname} {lastname} does not pass addition condition",
                               affil_override=authors[0].affiliation,
                               subj_override=authors[0].areas)
                print("no match found among profiles\n")
            else:
                add_author_row(best_match_author, True, output_dict)

searching for author Pieter Aartsma
no scival profile found

searching for author Mohamed Abdel-Fattah
no scival profile found

searching for author Ozotu Abu
no scival profile found

searching for author Trudy Jean Adam
no scival profile found

searching for author Leona Adams
no scival profile found

searching for author Adetokunbo Adeshina
no scival profile found

searching for author Qasim Afridi
no scival profile found

searching for author Ayesha Afzal
no scival profile found

searching for author Neelam Afzal
no scival profile found

searching for author Marina Agafonov
no scival profile found

searching for author Onome Agbahovbe
no scival profile found

searching for author Hezekiah Agboji
no scival profile found

searching for author Eugene Agranovich
no scival profile found

searching for author Nauman Ahmad
no scival profile found

searching for author Nick Ainsworth
no scival profile found

searching for author Geoffrey Ainsworth
no scival profile found

searching for auth

In [161]:
# save file
df = pd.DataFrame.from_dict(output_dict)
df.to_csv("manual_affils_check_test.csv")
# df.to_csv(OUTPUT_FILEPATH)