In [45]:
import pandas as pd
import numpy as np
import os
import json
import requests


In [2]:
keys_json = json.load(open('env_keys.json'))
scopus_key = keys_json['scopus_key']
req_headers = {
    'X-ELS-APIKey' : scopus_key
}


In [13]:
firstname = 'john'
lastname = 'smith'
author_search_params = {
    'httpAccept'  : 'application/json',
    'query' : 'AUTHFIRST({}) AND AUTHLASTNAME({}) AND SUBJAREA(ECON)'.format(firstname, lastname)
}
author_search_params

{'httpAccept': 'application/json',
 'query': 'AUTHFIRST(john) AND AUTHLASTNAME(smith) AND SUBJAREA(ECON)'}

In [14]:
response = requests.get(
    url='https://api.elsevier.com/content/search/author',
    headers=req_headers,
    params=author_search_params
)

In [18]:
response.json().get('search-results').keys()

dict_keys(['opensearch:totalResults', 'opensearch:startIndex', 'opensearch:itemsPerPage', 'opensearch:Query', 'link', 'entry'])

In [24]:
response.json().get('search-results').get('entry')[0]

{'@_fa': 'true',
 'link': [{'@_fa': 'true',
   '@ref': 'self',
   '@href': 'https://api.elsevier.com/content/author/author_id/55498586200'},
  {'@_fa': 'true',
   '@ref': 'search',
   '@href': 'https://api.elsevier.com/content/search/author?query=au-id%2855498586200%29'},
  {'@_fa': 'true',
   '@ref': 'scopus-citedby',
   '@href': 'https://www.scopus.com/author/citedby.uri?partnerID=HzOxMe3b&citedAuthorId=55498586200&origin=inward'},
  {'@_fa': 'true',
   '@ref': 'scopus-author',
   '@href': 'https://www.scopus.com/authid/detail.uri?partnerID=HzOxMe3b&authorId=55498586200&origin=inward'}],
 'prism:url': 'https://api.elsevier.com/content/author/author_id/55498586200',
 'dc:identifier': 'AUTHOR_ID:55498586200',
 'eid': '9-s2.0-55498586200',
 'preferred-name': {'surname': 'Smith',
  'given-name': 'Andrew John',
  'initials': 'A.J.'},
 'name-variant': [{'@_fa': 'true',
   'surname': 'Smith',
   'given-name': 'Andy',
   'initials': 'A.'},
  {'@_fa': 'true', 'surname': 'Smith', 'given-name':

In [39]:
def constructAliases(firstname, lastname, initials):
    #First Last
    x = firstname + ' ' + lastname
    # Initials Last
    y = initials + ' ' + lastname
    return [x, y]


In [44]:
aliases_dict = {}
for i, author_object in enumerate(response.json().get('search-results').get('entry')):
    scopus_id = author_object.get('dc:identifier').split(':')[1]
    print(scopus_id)
    aliases_dict[scopus_id] = []
    author_object_preferred_name_obj = author_object.get('preferred-name')
    print('Preferred name fields: {}'.format(i))
    surname = author_object_preferred_name_obj.get('surname')
    givenname = author_object_preferred_name_obj.get('given-name')
    initials = author_object_preferred_name_obj.get('initials')
    print('\t{}'.format(surname))
    print('\t{}'.format(givenname))
    print('\t{}'.format(initials))
    aliases_dict[scopus_id].append(constructAliases(givenname, surname, initials))

    author_object_name_variant_obj = author_object.get('name-variant')
    if author_object_name_variant_obj == None:
        print('nothing to see here')
    elif type(author_object_name_variant_obj) == list: 
        for j, name_variant_object in enumerate(author_object_name_variant_obj):


            variant_surname = name_variant_object.get('surname')
            variant_givenname = name_variant_object.get('given-name')
            variant_initials = name_variant_object.get('initials')
            print('Name variant fields: {}-{}'.format(i,j))
            print('\t{}'.format(variant_surname))
            print('\t{}'.format(variant_givenname))
            print('\t{}'.format(variant_initials))
            aliases_dict[scopus_id].append(constructAliases(variant_givenname, variant_surname, variant_initials))
    
    aliases_dict[scopus_id] = set(sum(aliases_dict[scopus_id], []))
    print(aliases_dict)

55498586200
Preferred name fields: 0
	Smith
	Andrew John
	A.J.
Name variant fields: 0-0
	Smith
	Andy
	A.
Name variant fields: 0-1
	Smith
	A.
	A.
Name variant fields: 0-2
	Smith
	Andrew J.
	A.J.
{'55498586200': {'A.J. Smith', 'A. Smith', 'Andrew John Smith', 'Andrew J. Smith', 'Andy Smith'}}
7501693900
Preferred name fields: 1
	Smith
	Ben J.
	B.J.
Name variant fields: 1-0
	Smith
	Ben John
	B.J.
Name variant fields: 1-1
	Smith
	Ben
	B.
Name variant fields: 1-2
	Smith
	B. J.
	B.J.
{'55498586200': {'A.J. Smith', 'A. Smith', 'Andrew John Smith', 'Andrew J. Smith', 'Andy Smith'}, '7501693900': {'B.J. Smith', 'B. J. Smith', 'Ben J. Smith', 'Ben John Smith', 'B. Smith', 'Ben Smith'}}
55911705800
Preferred name fields: 2
	Russell-Smith
	Jeremy
	J.
Name variant fields: 2-0
	Russell-Smith
	J. C.Z.
	J.C.Z.
Name variant fields: 2-1
	Russell Smith
	Jr
	J.
Name variant fields: 2-2
	Russell Smith
	Jeremy
	J.
{'55498586200': {'A.J. Smith', 'A. Smith', 'Andrew John Smith', 'Andrew J. Smith', 'Andy Smith

In [25]:
response.json().get('search-results').get('entry')[0].keys()

dict_keys(['@_fa', 'link', 'prism:url', 'dc:identifier', 'eid', 'preferred-name', 'name-variant', 'document-count', 'subject-area', 'affiliation-current'])

In [51]:
import re
authors_abstracts_df = pd.DataFrame()
filenames = os.listdir('scopus_data')
path_prepend = 'scopus_data/'
for filename in filenames:
    if re.search(r'_author_abstract_funding\.csv', filename):
        file_path = path_prepend + filename
        print(file_path)
        journal_authors_abstracts_df = pd.read_csv(file_path)
        authors_abstracts_df = pd.concat([authors_abstracts_df, journal_authors_abstracts_df])

print(len(authors_abstracts_df))
authors_abstracts_df

scopus_data/AER_author_abstract_funding.csv


  journal_authors_abstracts_df = pd.read_csv(file_path)


scopus_data/ALJ_author_abstract_funding.csv
scopus_data/ATB_author_abstract_funding.csv
scopus_data/ATX_author_abstract_funding.csv
scopus_data/ECA_author_abstract_funding.csv
scopus_data/ECX_author_abstract_funding.csv
scopus_data/JEM_author_abstract_funding.csv
scopus_data/JFE_author_abstract_funding.csv
scopus_data/JHR_author_abstract_funding.csv
scopus_data/JLE_author_abstract_funding.csv
scopus_data/JLO_author_abstract_funding.csv
scopus_data/JOF_author_abstract_funding.csv
scopus_data/JOL_author_abstract_funding.csv
scopus_data/JPE_author_abstract_funding.csv
scopus_data/QJE_author_abstract_funding.csv
scopus_data/RES_author_abstract_funding.csv
scopus_data/RFS_author_abstract_funding.csv
scopus_data/RJE_author_abstract_funding.csv
682578


Unnamed: 0,doi,sc_title,sc_issn,sc_pub_name,sc_vol,sc_issue,sc_page_range,sc_abstract_api_endpoint,sc_human_url,sc_pub_date,...,sc_author_id,sc_author_given_name,sc_author_last_name,sc_author_affil_id,sc_author_affil_indexed,sc_grant_text,sc_funding_text,sc_funding_agency,sc_author_indexed_name,sc_abstract_text
0,10.1257/aer.20201238,Team- Specific Human Capital and Team Performa...,28282,American Economic Review,111,12,3923-3962,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,2021-12-01,...,57388639400.0,Yiqun,Chen,60027561,University of Illinois at Chicago,SCOPUS FAILURE,* University of Illinois at Chicago (email: yq...,SCOPUS FAILURE,Chen Y.,© 2021 American Economic Association. All righ...
1,10.1257/aer.20181801,Prep School for Poor Kids: The Long- Run Impac...,28282,American Economic Review,111,12,3963-4001,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,2021-12-01,...,14631703900.0,Martha J.,Bailey,60027550,"Department of Economics, University of Califor...",SCOPUS FAILURE,"* Bailey: Department of Economics, University ...",SCOPUS FAILURE,Bailey M.J.,© 2021 American Economic Association. All righ...
2,10.9767/BCREC.17.1.12473.22-31,Team-Specific Human Capital and Team Performan...,28282,American Economic Review,111,12,3923-3962,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,2021-12-01,...,57388639400.0,Yiqun,Chen,60027561,University of Illinois at Chicago,I gratefully acknowledge support from the Leon...,* University of Illinois at Chicago (email: yq...,SCOPUS FAILURE,Chen Y.,© 2021 American Economic Association. All righ...
3,10.9767/BCREC.17.1.12366.32-45,M Equilibrium: A Theory of Beliefs and Choices...,28282,American Economic Review,111,12,4002-4045,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,2021-12-01,...,7003736000.0,Jacob K.,Goeree,60028333,"AGORA Center for Market Design, UNSW",We gratefully acknowledge funding from the Aus...,"* Goeree: AGORA Center for Market Design, UNSW...",SCOPUS FAILURE,Goeree J.K.,© 2021 American Economic Association. All righ...
4,10.9767/BCREC.17.1.12174.1-12,Sectoral Media Focus and Aggregate Fluctuations,28282,American Economic Review,111,12,3872-3922,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,2021-12-01,...,36666132400.0,Ryan,Chahrour,60031117,"Economics Department, Boston College",SCOPUS FAILURE,SCOPUS FAILURE,SCOPUS FAILURE,Chahrour R.,© 2021 American Economic Association. All righ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2207,10.2307/2601057,The supply of charity services by nonprofit ho...,7416261,The Rand journal of economics,22,3,430-445,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,1991-01-01,...,7004310598.0,D. S.,Salkever,60005248,SCOPUS FAILURE,,,,Salkever D.,This article studies provision of charity care...
2208,10.2307/2601018,Uncertain litigation and liability insurance.,7416261,The Rand journal of economics,22,2,218-231,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,1991-01-01,...,6603642202.0,B.,Sarath,60006297,SCOPUS FAILURE,SCOPUS FAILURE,SCOPUS FAILURE,SCOPUS FAILURE,Sarath B.,Legal penalties and liability insurance seem t...
2209,10.2307/2601017,Medical malpractice: an empirical examination ...,7416261,The Rand journal of economics,22,2,199-217,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,1991-01-01,...,7005716141.0,H. S.,Farber,60022195,SCOPUS FAILURE,SCOPUS FAILURE,SCOPUS FAILURE,SCOPUS FAILURE,Farber H.,New data on medical malpractice claims against...
2210,10.2307/2601017,Medical malpractice: an empirical examination ...,7416261,The Rand journal of economics,22,2,199-217,https://api.elsevier.com/content/abstract/scop...,https://www.scopus.com/inward/citedby.uri?part...,1991-01-01,...,7404176354.0,M. J.,White,60022195,SCOPUS FAILURE,,,,White M.,New data on medical malpractice claims against...


In [None]:
unique_authors = authors_abstracts_df.unique(subset=['scopus_author_id', ])