In [80]:
import os
import requests
import json
import csv
import pandas as pd
from dotenv import load_dotenv
from time import sleep
load_dotenv()

from pybliometrics.scopus.utils import config
# print(config['Authentication']['APIKey'])  # Show keys
# config['Authentication']['APIKey'] = "7f59af901d2d86f78a1fd60c1bf9426a"
config['Authentication']['APIKey'] = "127f5bf5de20d338f686704a9a328b86"
print(config['Authentication']['APIKey'])

127f5bf5de20d338f686704a9a328b86


In [81]:
from pybliometrics.scopus import CitationOverview, AuthorRetrieval, AuthorSearch

In [82]:
PATH_TO_INPUT_CSV = "in/PSYT faculty member lists UTF8.csv"
OUTPUT_FILEPATH = "out/PSYT faculty member lists SciVal formatted.csv"
cols = ['Last Name', 'First Name']
subjects_list = ["NEUR", "BIOC", "MEDI", "PSYC", "HEAL", "IMMU", "NURS", "PHAR"]
affiliation_list = ["The University of British Columbia", "Djavad Mowafaghian Centre for Brain Health", "UBC Hospital", "BC Children's Hospital Research Institute", "University of British Columbia, Faculty of Medicine",]
affil_id_list = ['60023077', '60010365', '60012423']
#[UBC faculty of medicine, UBC, UBC hospital]
city_list = ["Vancouver"]
# country_list = ["Canada"]
affil_search_string = "affil('UBC') or affil('University of British Columbia') or affil('Hospital')"
cols_of_interest = ["Last Name", "First Name", "Affiliation", "Author ID", "ORCID", "Research Areas", "Warning"]

In [83]:
#read member csv file accordingly
member_arr = []
with open(PATH_TO_INPUT_CSV, "r") as f:
    r = csv.reader(f)
    for row in r:
        if row[0] != "":
            member_arr.append(row)

# skip header rows
member_arr = member_arr[1:]
print(len(member_arr))

802


In [73]:
# member_arr = [["Abdel-Fattah","Mohamed"]]

In [84]:
def is_matching_subjects(author) -> bool:
    # ensures that the author's areas of study/publications matches target, assuming affiliation is already confirmed

    try:
        return any([topic in author.areas for topic in subjects_list])
    except:
        print("no subjects associated with profile")
        return False

In [85]:
def is_matching_affils(author) -> bool:
    try:
        return (any([affil in author.affiliation for affil in affiliation_list]) 
                or any([city in author.city for city in city_list]))
    except:
        print("no affiliation with associated profile")
        return False

In [86]:
def is_target_profile(author) -> bool:
    '''criteria for whether or not profile matches the target requirements'''
    return (is_matching_subjects(author) and is_matching_affils(author))

    # subject_check = False
    # affiliation_check = False
    # city_check = False
    # country_check = False

    # try:
    #     subject_check = any([topic in author.areas for topic in subjects_list])
    # except:
    #     print("no subject with associated profile")

    # try:
    #     affiliation_check = any([affil in affiliation_list for affil in author.affiliation])
    #     city_check = any([city in city_list for city in author.city]) 
    #     country_check = any([country in affiliation_list for country in author.country])
    # except:
    #     print("no affiliation with associated profile")

    # # if subject and affiliations match
    # if subject_check and affiliation_check:
    #     return True
    # elif affiliation_check:
    #     return True
    # elif (city_check or country_check) and subject_check:
    #     return True
    # else:
    #     return False

In [87]:
def add_author_row(author:tuple, is_target, out_dict, warning):
    # adds a row to the dictionary
    match is_target:
        case True:
            #add author information to dictionary
            lastname = author.surname
            firstname = author.givenname
            print("adding " + firstname + " " + lastname + "...\n")

            affil_name = author.affiliation
            author_id = author.eid.split("-")[-1]
            orcid = ""
            try:
                orcid = author.orcid
            except:
                pass
            subject = author.areas

            out_dict[cols_of_interest[0]].append(lastname) #"Last Name"
            out_dict[cols_of_interest[1]].append(firstname) #"First Name"
            out_dict[cols_of_interest[2]].append(affil_name) #"Affiliation"
            out_dict[cols_of_interest[3]].append(author_id) #"Author ID"
            out_dict[cols_of_interest[4]].append(orcid) #"ORCID"
            out_dict[cols_of_interest[5]].append(subject) #"Research Areas"
            out_dict[cols_of_interest[6]].append("") #"Warning"
        case False:
            #add blank rows with warning
            lastname = author[0]
            firstname = author[1]
            print("no scival profile found for " + firstname + " " + lastname + "\n")
            out_dict[cols_of_interest[0]].append(lastname) #"Last Name"
            out_dict[cols_of_interest[1]].append(firstname) #"First Name"
            out_dict[cols_of_interest[2]].append("") #"Affiliation"
            out_dict[cols_of_interest[3]].append("") #"Author ID"
            out_dict[cols_of_interest[4]].append("") #"ORCID"
            out_dict[cols_of_interest[5]].append("") #"Research Areas"
            out_dict[cols_of_interest[6]].append(warning) #"Warning"
            

In [88]:
def compare_all_profiles(authors):
    for i in range(len(authors)):
        print("looking at profile " + str(i))
        if is_target_profile(authors[i]):
            return authors[i]
    return None

In [89]:
# if changing columns, make sure to change code in add_author_row() as well!
output_dict = {f"{key}":[] for key in cols_of_interest}

#go through all members
for member in member_arr:
    start = 0
    sleep(1) #pause for one second to prevent API warning

    #search for author
    lastname = member[0].strip()
    firstname = member[1].strip()
    print(f"searching for author {firstname} {lastname}")

    query = f"authlastname({lastname}) and authfirst({firstname}) and {affil_search_string}"
    s = AuthorSearch(query)
    authors = s.authors
    
    num_authors_found = s.get_results_size()

    match num_authors_found:
        case 0:
            #if there are no members, add blank row
            add_author_row((lastname, firstname), False, output_dict, warning=f"no authors found with criteria")
        case 1:
            #if there is a member, save if subjects match
            if is_target_profile(authors[0]):
                add_author_row(authors[0], True, output_dict, warning="")
        case _:
            #if there are multiple members, take a look through each
            best_match_author = compare_all_profiles(authors)
            if best_match_author == None:
                print("no match found among profiles\n")
            else:
                add_author_row(best_match_author, True, output_dict, warning="multiple profiles found")
    


searching for author Pieter Aartsma
no scival profile found for Pieter Aartsma

searching for author Mohamed Abdel-Fattah
looking at profile 0
looking at profile 1
looking at profile 2
looking at profile 3
no match found among profiles

searching for author Ozotu Abu
no scival profile found for Ozotu Abu

searching for author Trudy Jean Adam
no scival profile found for Trudy Jean Adam

searching for author Leona Adams
no scival profile found for Leona Adams

searching for author Adetokunbo Adeshina
no scival profile found for Adetokunbo Adeshina

searching for author Qasim Afridi
no scival profile found for Qasim Afridi

searching for author Ayesha Afzal
searching for author Neelam Afzal
searching for author Marina Agafonov
no scival profile found for Marina Agafonov

searching for author Onome Agbahovbe
no scival profile found for Onome Agbahovbe

searching for author Hezekiah Agboji
no scival profile found for Hezekiah Agboji

searching for author Eugene Agranovich
no scival profile 

In [90]:
df = pd.DataFrame.from_dict(output_dict)
df.to_csv("restructured1.csv")
# df.to_csv(OUTPUT_FILEPATH)

OLD CODE vvvv

In [None]:
last_names = []
first_names = []
affils = []
auth_ids = []
orcids = []
tags = []
cities = []
countries = []

warnings = []
subjects = []

no_search_results = []

#go through all members
for member in member_arr:
    start = 0
    sleep(1) #pause for one second to prevent API warning

    #search for author
    lastname = member[0].strip()
    firstname = member[1].strip()

    print("searching for " + firstname + " " + lastname)

    query = f"authlastname({lastname}) and authfirst({firstname})"
    s = AuthorSearch(f"authlast({lastname}) and authfirst({firstname})")
    
    warning = ""
    authors = s.authors
    num_authors_found = s.get_results_size()
    
    #returns number of authors found
    if num_authors_found == 0:
        print("no scival profile found for " + firstname + " " + lastname + "\n")
        last_names.append(lastname)
        first_names.append(firstname)
        affils.append("")
        cities.append("")
        countries.append("")
        auth_ids.append("")
        warnings.append("no profile found")
        subjects.append("")
        orcids.append("")
        no_search_results.append(member)
        continue

    if num_authors_found == 1:
        if validate_profile(s.authors[0]):
            ### TODO
            pass

    # search for name + matching institution

    # search for name + within canada

    # add author to the list

    author_idx = 0
    match_found = False
    #multiple authors found with first/last name search
    if num_authors_found > 1:
        print(str(num_authors_found) + " profile(s) found for " + firstname + " " + lastname)
        # warning = warning + "multiple profiles; "
        
        best_match = 0
        #look through all the authors
        for idx in range(num_authors_found):
            # is_suitable_author = validate_profile(authors[idx])
            # if is_suitable_author:
            #     break
            print("looking at profile " + str(idx))

            # run comparison checks
            subject_check = False
            affiliation_check = False
            city_check = False
            country_check = False

            try:
                subject_check = any([topic in authors[idx].areas for topic in subjects_list])
            except:
                print("no subject with associated profile")

            try:
                affiliation_check = any([affil in affiliation_list for affil in authors[idx].affiliation])
                city_check = any([city in city_list for city in authors[idx].city]) 
                country_check = any([country in affiliation_list for country in authors[idx].country])
            except:
                print("no affiliation with associated profile")

            # if subject and affiliations match
            if subject_check and affiliation_check:
                print("subject and affiliation match")
                best_match = idx 
                match_found = True
                break
            elif affiliation_check:
                print("affiliation match")
                best_match = idx 
                match_found = True
                break
            elif (city_check or country_check) and subject_check:
                print("city/country match")
                best_match = idx
                match_found = True
                break
        
        if not match_found:
            print("looked through all profiles. no scival profile found for " + firstname + " " + lastname + "\n")
            last_names.append(lastname)
            first_names.append(firstname)
            affils.append("")
            cities.append("")
            countries.append("")
            auth_ids.append("")
            warnings.append("multiple profiles, no match")
            subjects.append("")
            orcids.append("")
            no_search_results.append(member)
            continue
    
        print("best match found")
        author_idx = best_match

    #get author affiliation name
    affil_name = ""
    city = ""
    country = ""
    try:
        affil_name = authors[author_idx].affiliation
        city = authors[author_idx].city
        country = authors[author_idx].country
        if affil_name not in affiliation_list:
            warning = warning + "affiliation not in list"
    except:
        warning = "no affiliation"
    
    #get author scopus id
    author_id = ""
    try:
        author_id = authors[author_idx].eid.split("-")[-1]
    except:
        pass
    
    # affil_id = data["search-results"]["entry"][0]["affiliation-current"]["affiliation-id"]

    #get author publication topics
    subject = ""
    try:
        subject = authors[author_idx].areas
        if not any([topic in authors[author_idx].areas for topic in subjects_list]):
            warning = warning + " subject mismatch; "
    except:
        warning = warning + " no subjects found; "
        pass

    #get author orcid if available
    orcid = ""
    try:
        orcid = authors[author_idx].orcid
    except:
        pass
    
    #add variables to respective arrays to turn into csv
    try:
        scopus_lastname = authors[author_idx].surname
        scopus_firstname = authors[author_idx].givenname
    except:
        scopus_lastname = lastname
        scopus_firstname = firstname

    last_names.append(scopus_lastname)
    first_names.append(scopus_firstname)
    affils.append(affil_name)
    cities.append(city)
    countries.append(country)
    auth_ids.append(author_id)
    warnings.append(warning)
    subjects.append(subject)
    orcids.append(orcid)

    print("\n")

searching for Pieter Aartsma
no scival profile found for Pieter Aartsma

searching for Mohamed Abdel-Fattah
21 profile(s) found for Mohamed Abdel-Fattah
looking at profile 0
looking at profile 1
looking at profile 2
looking at profile 3
looking at profile 4
looking at profile 5
no affiliation with associated profile
looking at profile 6
looking at profile 7
looking at profile 8
looking at profile 9
looking at profile 10
looking at profile 11
looking at profile 12
looking at profile 13
looking at profile 14
looking at profile 15
looking at profile 16
looking at profile 17
looking at profile 18
looking at profile 19
looking at profile 20
looked through all profiles. no scival profile found for Mohamed Abdel-Fattah

searching for Ozotu Abu
no scival profile found for Ozotu Abu

searching for Trudy Jean Adam
no scival profile found for Trudy Jean Adam

searching for Leona Adams
no scival profile found for Leona Adams

searching for Adetokunbo Adeshina


NameError: name 'validate_profile' is not defined

In [None]:
out_dict = {
    "Last Name": last_names,
    "First Name": first_names,
    "Affiliation": affils,
    "Country": countries,
    "Scopus Author ID": auth_ids,
    "ORCID": orcids,
    "Research Areas": subjects,
    "Warnings": warnings,
}
print(no_search_results)

for key in out_dict:
    print(len(out_dict[key]))

[['Aartsma', 'Pieter'], ['Abdel-Fattah', 'Mohamed'], ['Abu', 'Ozotu'], ['Adam', 'Trudy Jean'], ['Adams', 'Leona'], ['Adeshina', 'Adetokunbo'], ['Afridi', 'Qasim'], ['Afzal', 'Ayesha'], ['Afzal', 'Neelam'], ['Agafonov', 'Marina']]
10
10
10
10
10
10
10
10


In [None]:
df = pd.DataFrame.from_dict(out_dict)
df.to_csv("test.csv")
# df.to_csv(OUTPUT_FILEPATH)