In [1]:
import vcr
import pandas as pd
import json
from typing import List
from semanticscholar import SemanticScholar

# Read list of professors that will be featured.
with open("professors.json") as f:
    professors = json.load(f)["professors"]

client = SemanticScholar(timeout=1)

class DataParser():

    def __init__(self, professors: List[str]):
        self.get_prof_objs(professors)
    
    def get_prof(self, name: str):
        with vcr.use_cassette('cache/{}.yaml'.format(name)):
            response = sorted(client.search_author(name), key=lambda x: -x['hIndex'])[0]
            return response

    def get_prof_objs(self, professors: List[str]):
        objs = {}
        for prof in professors:
            try:
                objs[prof] = self.get_prof(prof)
            except:
                pass
        
        self.objs = objs

    def get_author_table(self):
        author_data = []
        for i, prof in enumerate(self.objs.keys()):
            author_data.append([str(i + 1), prof, self.objs[prof]['url']])
        
        return author_data

    def get_alias_table(self):
        aliases = set()
        alias_data = []
        for prof in self.objs.keys():
            if self.objs[prof]['aliases'] == None: continue
            for alias in self.objs[prof]['aliases']:
                aliases.add(alias)
        for i, alias in enumerate(aliases):
            alias_data.append([i + 1, alias])
        
        return alias_data
    
    def get_knownas_table(self):
        return []
    
    def get_paper_table(self):
        return []

In [2]:
parser = DataParser(professors)

In [3]:
parser.get_alias_table()

[[1, 'C. Chien'],
 [2, 'D.a. Bauer'],
 [3, 'Clifford S. Stein'],
 [4, 'D. Knowles'],
 [5, 'Daniel I Rubenstein'],
 [6, 'C Wan Chien'],
 [7, 'C. F. Chien'],
 [8, 'A Cannon'],
 [9, 'C. Vondrick'],
 [10, 'Daniel A. Bauer'],
 [11, 'S.k. Feiner'],
 [12, 'A D Cannon'],
 [13, 'C H Papadimitriou'],
 [14, 'M. Yannakakis'],
 [15, 'Marth Kim'],
 [16, 'S. W. Edwards'],
 [17, 'S. Feiner'],
 [18, 'D K Hsu'],
 [19, 'S Edwards'],
 [20, 'Cliff Stein'],
 [21, 'David M. Blei'],
 [22, 'Steven Edwards'],
 [23, 'D I Rubenstein'],
 [24, 'Luis Gravano'],
 [25, 'Mihalis Yannakakis'],
 [26, 'Steven W. Edwards'],
 [27, 'D. Bauer'],
 [28, 'Cli Ord Stein'],
 [29, 'S. K. Feiner'],
 [30, 'Dan I. Rubenstein'],
 [31, 'Chien-lin Chang Chien'],
 [32, 'Daniel Adams Bauer'],
 [33, 'Christos Harilaos Papadimitriou'],
 [34, 'Daniel K Hsu Hsu'],
 [35, 'Martha B. Kim'],
 [36, 'D. A. Knowles'],
 [37, 'D.\u2009a. Bauer'],
 [38, 'Daniel Ian Rubenstein'],
 [39, 'Dan Bauer'],
 [40, 'Adam D. Cannon'],
 [41, 'Da Bauer'],
 [42, 'S K 

In [4]:
parser.get_author_table()

[['1', 'Luis Gravano', 'https://www.semanticscholar.org/author/1684012'],
 ['2', 'Paul Blaer', 'https://www.semanticscholar.org/author/1897655'],
 ['3',
  'Christos Papadimitriou',
  'https://www.semanticscholar.org/author/144102674'],
 ['4', 'Adam Cannon', 'https://www.semanticscholar.org/author/145051113'],
 ['5', 'Nakul Verma', 'https://www.semanticscholar.org/author/39706047'],
 ['6', 'Mihalis Yannakakis', 'https://www.semanticscholar.org/author/1748179'],
 ['7', 'Martha Kim', 'https://www.semanticscholar.org/author/8991961'],
 ['8', 'Tony Dear', 'https://www.semanticscholar.org/author/3258452'],
 ['9', 'Daniel Bauer', 'https://www.semanticscholar.org/author/87520404'],
 ['10', 'David Blei', 'https://www.semanticscholar.org/author/1796335'],
 ['11', 'Shing-Fu Chang', 'https://www.semanticscholar.org/author/145586362'],
 ['12', 'Eleni Drinea', 'https://www.semanticscholar.org/author/2103183'],
 ['13', 'Stephen Edwards', 'https://www.semanticscholar.org/author/144520714'],
 ['14', 'S