In [1]:
from scopuscite.scopus import Scopus
from scopuscite.utils import load_api_key
from scopuscite.aggregate import aggregate_author_info

Create scopus object with an API key for querying scopus.

In [2]:
scopus = Scopus(apikey=load_api_key())

Author ids for all authors who published in Annals of Mathematics in 2016.

In [3]:
author_ids = scopus.get_authors_from_journal_year(year=2016, 
    journal='Annals of Mathematics', issn='0003486X')

Querying Scopus to retrieve list of authors.
Api calls remaining: 18170 / 20000
Authors found: 81



To speed up downloading we use just a subset.

In [4]:
author_ids = ['10043135600', '14007991900', '14061167300', 
              '15751633600', '16483725200']

Scopus profiles of these authors as a dataframe.

In [5]:
authors = scopus.get_author_info(author_ids)

Retrieving info for 5 authors.
Loading cache file.
Cache size: 240 Bytes
Read from cache: 0
To be read from Scopus: 5
Chunk 1 / 1.
Saving cache file.
Api call remaining: 4915 / 5000
Author info retrieved.



In [6]:
authors.head()

Unnamed: 0_level_0,name,first_name,last_name,affiliation,first_pub,last_pub,npubs,ncites,ncited_by,ncoauthors,hindex
author_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10043135600,Dafermos M.,Mihalis,Dafermos,Princeton University,2003,2018,23,601,317,4,13
14007991900,Demeter C.,Ciprian,Demeter,"Indiana University, Department of Mathematics",2004,2017,34,206,143,17,8
14061167300,Sheffield S.,Scott,Sheffield,"Massachusetts Institute of Technology, Departm...",2002,2017,52,1288,821,39,18
15751633600,Reiher C.,Christian,Reiher,Universitat Hamburg,2007,2018,15,43,42,16,3
16483725200,Jannsen U.,Uwe,Jannsen,Universitat Regensburg,1979,2016,7,30,26,4,3


Scopus ids of all their publications

In [7]:
scopus_ids = scopus.get_author_publications(author_ids)

Querying Scopus to retrieve list of publication scopus ids.
Number of authors: 5
Loading cache.
Read from cache: 0
Authors to query Scopus: 5
Chunk 1 / 1
Publications found: 140



Citation information about these publications in for the years 1980-2018.
Following the python range convention the right endpoint is not included.

In [8]:
pubs = scopus.get_publication_info(scopus_ids, (1980, 2019))

Retrieving publication info for 140 ids.
Loading cache file.
Cache size: 240 Bytes
Total read from cache: 0
To be retrieved from Scopus: 140
Saving cache file.
18646 / 20000 api calls remaining.
Publication info retrieved.



In [9]:
pubs.head()

Unnamed: 0_level_0,authors,cites_by_year,cites_start_year,journal,lcc,ncites,pcc,title,year
scopus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
80054754274,"[6603457363, 14422566500, 14061167300]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Journal of the American Mathematical Society,0,21,0,Logarithmic fluctuations for internal DLA,2012
84455202334,"[14007991900, 7005080281]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Journal of Mathematical Analysis and Applications,0,8,0,"Proof of the HRT conjecture for (2, 2) configu...",2012
84997294389,"[23970039700, 55566387800, 15751633600, 700365...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Journal of Combinatorial Theory. Series B,0,1,0,The chromatic number of finite type-graphs,2017
84891558057,"[14007991900, 24400923600]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Journal of Geometric Analysis,0,4,0,Logarithmic L<sup>p</sup>bounds for maximal di...,2014
33748693121,"[7006449711, 6701591697, 14061167300]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Annals of Mathematics,0,144,0,Dimers and amoebae,2006


Aggregate citation information for each author

In [10]:
authors_agg = aggregate_author_info(authors, pubs)

In [11]:
authors_agg.head()

Unnamed: 0_level_0,name,first_name,last_name,affiliation,ncited_by,npubs,first_pub,last_pub,ncites,ncoauthors,hindex,pcc,cites_by_year,lcc,pubs_by_year,ncoauthors_mean,ncoauthors_acc
author_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
10043135600,Dafermos M.,Mihalis,Dafermos,Princeton University,317,24,2003,2018,616,4,13,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.666667,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
14007991900,Demeter C.,Ciprian,Demeter,"Indiana University, Department of Mathematics",143,34,2004,2017,211,17,8,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.029412,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
14061167300,Sheffield S.,Scott,Sheffield,"Massachusetts Institute of Technology, Departm...",821,52,2002,2017,1329,40,19,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.519231,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15751633600,Reiher C.,Christian,Reiher,Universitat Hamburg,42,15,2007,2018,44,16,3,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.733333,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
16483725200,Jannsen U.,Uwe,Jannsen,Universitat Regensburg,26,15,1979,2016,297,6,7,0,"[1, 1, 4, 2, 4, 2, 2, 4, 2, 2, 2, 2, 2, 5, 1, ...",0,"[1, 0, 4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, ...",0.733333,"[2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ..."


Note that if we run the code again, scopus is not querying any more and results are read from the cache.

In [12]:
pubs = scopus.get_publication_info(scopus_ids, (1980, 2019))

Retrieving publication info for 140 ids.
Loading cache file.
Cache size: 4.7 kB
Total read from cache: 140
To be retrieved from Scopus: 0
Saving cache file.
Scopus api was not called.
Publication info retrieved.



In [13]:
pubs.head()

Unnamed: 0_level_0,authors,cites_by_year,cites_start_year,journal,lcc,ncites,pcc,title,year
scopus_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
84941438062,[16483725200],"[0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",1980,Journal fur die Reine und Angewandte Mathematik,0,7,0,Einbettungsprobleme und Galoisstruktur lokaler...,1980
84982135183,"[7005720845, 14007991900, 23103119100]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Annals of Mathematics,0,24,0,Proof of the main conjecture in Vinogradov's m...,2016
84979231132,"[57190340363, 14061167300, 55727882800, 561674...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Probability Surveys,0,4,0,Fractional gaussian fields: A survey,2016
28244482372,"[10043135600, 7003781103]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Classical and Quantum Gravity,0,13,0,Inextendibility of expanding cosmological mode...,2005
50949108542,"[14007991900, 7102792816, 8115250500]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1980,Transactions of the American Mathematical Society,0,17,0,Maximal multilinear operators,2008
