In [None]:
%pylab inline
import scholarly
import pandas as pd

In [None]:
def get_author_list(filename):
    '''
    Assume that you have a file with 'LastName\tFirstName' in each line
    adds ualberta since the authors are from ualberta
    WARNING: We assume all punctuation has been removed and encoding is utf-8
    '''
    authors = []
    with open(filename, 'r', encoding='utf-8') as f:
        for l in f:
            s = l.strip().split('\t')
            total = f'{s[1]} {s[0]} ualberta' 
            authors.append(total)
    return authors

In [None]:
def get_pub_list(google_profile):
    '''
    Loops through google profile publications and extracts information as a dictionary
    Input: Author->google_profile
    Output: list[dict]
    '''
    return [pub_to_dict(google_profile.id, i) for i in google_profile.publications]
    
def pub_to_dict(author_id, pub):
    '''
    Extracts basic information about a publication into a dictionary
    This list is later transformed into a table that can be queried by author_id
    
    Input: string->author_id
           Publication->pub 
    '''
    y_pub = 0
    
    try:
        y_pub = pub.bib['year']
    except:
        y_pub = 0
    
    return {'author_id': author_id, 'title':pub.bib['title'], 'year':y_pub}

In [None]:
def get_gs_profile(name):
    '''
    Collects the information from the google profile based on the Author Name
    Formats into a dictionary with basic information and returns
    
    Input: string->name
    Output: dict
    '''
    print(f"Searching: {name}")
    search_query = scholarly.search_author(name)
    try:
        gs = next(search_query).fill()
    except:
        print(f'{name} not Found')
        return False
    
    get_pub_list(gs)
        
    return {'author_id':gs.id, 
            'name':gs.name, 
            'h_index':gs.hindex, 
            'citation_count':gs.citedby, 
            'citation_count_5y':gs.citedby5y, 
            'coauthors_count':len(gs.coauthors)}

In [None]:
pd.DataFrame(pubs).to_csv('pubs.csv')
pd.DataFrame(aut_table).to_csv('authors_science.csv')