[API documentation](https://dev.elsevier.com/documentation/ScopusSearchAPI.wadl)


In [77]:
import requests
import pandas as pd
import time
import pickle
import re

In [3]:
# to communicate with google spreadsheet...
import gspread
from gspread_dataframe import get_as_dataframe
from gspread_dataframe import set_with_dataframe
from google.oauth2 import service_account # based on google-auth library
import sddk

s = sddk.cloudSession("sciencedata.dk")
# establish connection with gogglesheets...
file_data = s.read_file("https://sciencedata.dk/files/ServiceAccountsKey.json", "dict") # or load it from a local storage: json.load(open("../../ServiceAccountsKey.json", "r"))
credentials = service_account.Credentials.from_service_account_info(file_data)
gc = gspread.Client(auth=credentials.with_scopes(['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']))
mops_data = gc.open_by_url("https://docs.google.com/spreadsheets/d/1VbCIAJssHKV9hlRTwzVFfm40CGnHesq53KXjv2qy4OM/edit?usp=sharing")

endpoint variable has been configured to: https://sciencedata.dk/files/


In [5]:
apikey = input("go to https://dev.elsevier.com/documentation/ScopusSearchAPI.wadl, generate your API key, copy it and insert it here: ")

In [6]:
headers = {"X-ELS-APIKey" : apikey}

In [7]:

api_address = "https://api.elsevier.com/content/search/scopus?"
base_url = api_address + "&query="
print(base_url)

https://api.elsevier.com/content/search/scopus?&query=


In [8]:
query = "religion"
base_url + query

'https://api.elsevier.com/content/search/scopus?&query=religion'

In [9]:
requests.get(base_url + query, headers=headers).json()["search-results"]["opensearch:totalResults"]

'635772'

In [10]:
query= "TITLE-ABS-KEY ( \"Paul the Apostle\"  OR  \"Apostle Paul\"  OR  \"Paul of Tarsus\"  OR  \"Pauline liter\" )"
resp = requests.get(base_url + query, headers=headers)
resp.json()["search-results"]["opensearch:totalResults"]

'522'

In [11]:
query= 'TITLE-ABS-KEY ( "Paul the Apostle"  OR  "Apostle Paul"  OR  "Paul of Tarsus"  OR  "Pauline literature"  OR  "Paul\'s epistles"  OR  "Paul\'s letters"  OR  "letters of Paul" )'
resp = requests.get(base_url + query, headers=headers)
resp.json()["search-results"]["opensearch:totalResults"]

'1075'

In [12]:
phrases_data = []
phrases = ["paul the apostle",
           "apostle paul",
           "paul of tarsus",
           "pauline literature",
           "paul\'s epistle",
           "Paul\'s letter",
           "letter of Paul",
           "epistle of paul"]

for phrase in phrases:
    cats_data = {}
    cats_data["phrase"] = phrase
    for cat in ["TITLE", "ABS", "KEY"]:
        query = cat + " ( \"{0}\")".format(phrase)
        print(query)
        resp = requests.get(base_url + query, headers=headers)
        cats_data[cat] = resp.json()["search-results"]["opensearch:totalResults"]
    phrases_data.append(cats_data)

TITLE ( "paul the apostle")
ABS ( "paul the apostle")
KEY ( "paul the apostle")
TITLE ( "apostle paul")
ABS ( "apostle paul")
KEY ( "apostle paul")
TITLE ( "paul of tarsus")
ABS ( "paul of tarsus")
KEY ( "paul of tarsus")
TITLE ( "pauline literature")
ABS ( "pauline literature")
KEY ( "pauline literature")
TITLE ( "paul's epistle")
ABS ( "paul's epistle")
KEY ( "paul's epistle")
TITLE ( "Paul's letter")
ABS ( "Paul's letter")
KEY ( "Paul's letter")
TITLE ( "letter of Paul")
ABS ( "letter of Paul")
KEY ( "letter of Paul")
TITLE ( "epistle of paul")
ABS ( "epistle of paul")
KEY ( "epistle of paul")


In [13]:
scopus_phrases_overview = pd.DataFrame(phrases_data)
scopus_phrases_overview

Unnamed: 0,phrase,TITLE,ABS,KEY
0,paul the apostle,16,45,25
1,apostle paul,84,308,67
2,paul of tarsus,15,37,19
3,pauline literature,3,13,12
4,paul's epistle,28,67,4
5,Paul's letter,102,280,25
6,letter of Paul,37,73,7
7,epistle of paul,0,26,3


In [None]:
set_with_dataframe(mops_data.add_worksheet("scopus_phrases_overview", 1,1), scopus_phrases_overview)

In [14]:
phrases_combined = "(\"" + "\") OR (\"".join(phrases) + "\")"
print(phrases_combined)

("paul the apostle") OR ("apostle paul") OR ("paul of tarsus") OR ("pauline literature") OR ("paul's epistle") OR ("Paul's letter") OR ("letter of Paul") OR ("epistle of paul")


In [19]:
query = "TITLE-ABS-KEY (" + phrases_combined + " )"
resp = requests.get(base_url + query, headers=headers)
resp_json = resp.json()

In [20]:
items = resp_json["search-results"]["opensearch:totalResults"]
pagelength = resp_json["search-results"]["opensearch:itemsPerPage"]
entries = resp_json["search-results"]["entry"]

In [21]:
indices = list(range(0, int(items), int(pagelength)))
print(indices)

[0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 525, 550, 575, 600, 625, 650, 675, 700, 725, 750, 775, 800, 825, 850, 875, 900, 925, 950, 975, 1000, 1025, 1050, 1075, 1100]


In [72]:
entries = []
for i in indices:
    resp = requests.get(base_url + query + "&start={0}".format(str(i)) + "&view=COMPLETE", headers=headers)
    resp_json = resp.json()
    entry = resp_json["search-results"]["entry"]
    entries.extend(entry)

In [73]:
pickle.dump(entries, open("../data/entries_v3.pkl", "wb"))

# working with raw entries

In [28]:
#entries = pickle.load(open("../data/entries_v1.pkl", "rb"))

In [94]:
entries_df = pd.DataFrame(entries)

In [95]:
entries_df.head(5)

Unnamed: 0,@_fa,link,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:eIssn,prism:volume,...,openaccess,openaccessFlag,freetoread,freetoreadLabel,prism:issn,pii,fund-acr,fund-sponsor,prism:isbn,pubmed-id
0,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85133680470,2-s2.0-85133680470,An Unwonted Hagiographic Topos,Boicu D.,Religions,20771444,13,...,1,True,"{'value': [{'$': 'all'}, {'$': 'publisherfullg...","{'value': [{'$': 'All Open Access'}, {'$': 'Go...",,,,,,
1,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85132346905,2-s2.0-85132346905,Grounded Hope in God: Epiphany and Promise,Moser P.K.,Theology Today,20442556,79,...,0,False,,,405736.0,,,,,
2,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85132445095,2-s2.0-85132445095,Nosque Filio in haereditatem a Deo patre donam...,Alba López A.,Scripta Theologica,22546227,54,...,1,True,"{'value': [{'$': 'all'}, {'$': 'publisherhybri...","{'value': [{'$': 'All Open Access'}, {'$': 'Hy...",369764.0,,,,,
3,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85130551837,2-s2.0-85130551837,Recasting Paul as a Chauvinist within the West...,Wilson J.A.P.,Religions,20771444,13,...,1,True,"{'value': [{'$': 'all'}, {'$': 'publisherfullg...","{'value': [{'$': 'All Open Access'}, {'$': 'Go...",,,,,,
4,True,"[{'@_fa': 'true', '@ref': 'self', '@href': 'ht...",https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85125526138,2-s2.0-85125526138,Cruciform Onesimus? Considering How a Slave Wo...,Gupta N.,Expository Times,17455308,133,...,0,False,,,145246.0,,,,,


In [96]:
entries_df.drop(columns=['@_fa', 'link', 'freetoread', 'freetoreadLabel'], inplace=True)

In [99]:
entries_df['author-count'] = entries_df['author-count'].apply(lambda x: int(x["@total"]))

In [105]:
def extract_affiliation_names(x):
    try:
        return [el["affilname"] for el in x]
    except:
        []
entries_df["affiliation_simple"] = entries_df["affiliation"].apply(extract_affiliation_names)

In [106]:
set_with_dataframe(mops_data.add_worksheet("entries_df_v3", 1,1), entries_df)

In [107]:
entries_df.columns

Index(['prism:url', 'dc:identifier', 'eid', 'dc:title', 'dc:creator',
       'prism:publicationName', 'prism:eIssn', 'prism:volume',
       'prism:issueIdentifier', 'prism:pageRange', 'prism:coverDate',
       'prism:coverDisplayDate', 'prism:doi', 'dc:description',
       'citedby-count', 'affiliation', 'prism:aggregationType', 'subtype',
       'subtypeDescription', 'author-count', 'author', 'authkeywords',
       'article-number', 'source-id', 'fund-no', 'openaccess',
       'openaccessFlag', 'prism:issn', 'pii', 'fund-acr', 'fund-sponsor',
       'prism:isbn', 'pubmed-id', 'affiliation_simple'],
      dtype='object')

* 'dc:title' - title
* 'dc:creator' - author
* 'prism:publicationName' - name of the journal or of an volume
* 'prism:coverDate' - date of publication
* 'dc:description' - abstract
* 'subtypeDescription' - type of the entry (i,e, either Article or book chapter etc...)
* 'author-count' - number of authors
* 'author' - nested data containing additional information about all authors -> needs to be explored in more detail
* 'affiliation_simple' - a list of all mentioned affiliations

entries_df.columns


In [None]:
entries_df.columns


# Simple explorations

In [108]:
entries_df[entries_df["author-count"] > 1]

Unnamed: 0,prism:url,dc:identifier,eid,dc:title,dc:creator,prism:publicationName,prism:eIssn,prism:volume,prism:issueIdentifier,prism:pageRange,...,fund-no,openaccess,openaccessFlag,prism:issn,pii,fund-acr,fund-sponsor,prism:isbn,pubmed-id,affiliation_simple
10,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85126848614,2-s2.0-85126848614,Discourse-Rhetorical Strategies of Pauline Epi...,Chidume S.N.,SAGE Open,21582440,12,1,,...,undefined,1,True,,,,,,,[University of Nigeria]
24,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85127816783,2-s2.0-85127816783,Saint Paul’s Thorn in the Flesh: a Dermatologi...,Pascual A.M.,Scientia et Fides,23535636,10,1,9-27,...,undefined,1,True,23007648,,,,,,"[Hospital Universitari de Bellvitge, Universid..."
55,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85099398166,2-s2.0-85099398166,How could the spirit not be secular? Dialogue ...,Rohmer C.,Etudes Theologiques et Religieuses,22729011,95,4,625-644,...,undefined,0,False,00142239,,,,,,[Faculté de Montpellier]
82,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85111726701,2-s2.0-85111726701,"Onesimus, philemon’s runaway slave boy: A brie...",Manus C.U.,HTS Teologiese Studies / Theological Studies,20728050,77,1,,...,undefined,1,True,02599422,,,,,,"[University of Pretoria, National University o..."
89,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:85105631267,2-s2.0-85105631267,Jesus as a socially (ir)responsible innovator:...,Woods C.,International Journal of Public Theology,15697320,15,1,118-142,...,undefined,0,False,18725171,,,,,,"[The University of Auckland Business School, F..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1039,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:1542463682,2-s2.0-1542463682,The ethic of caring: the moral response to suf...,Hitchens E.,Christian scholar's review,,23,3,307-317,...,undefined,0,False,00172251,,,,,14628772,[Seattle Pacific University]
1045,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:84953959083,2-s2.0-84953959083,Testimonium spiritus sancti—an example of bern...,Bell T.M.M.A.C.,Bijdragen,,53,1,62-72,...,undefined,0,False,00062278,,,,,,"[Universiteit van Amsterdam, Dutch Luther Cons..."
1064,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:0020629376,2-s2.0-0020629376,Teaching Humanistic Medicine,Taegtmeyer H.,New England Journal of Medicine,15334406,309,14,860-862,...,undefined,0,False,00284793,,,,,6888478,[University of Texas Health Science Center at ...
1066,https://api.elsevier.com/content/abstract/scop...,SCOPUS_ID:0020051029,2-s2.0-0020051029,Death after trauma involving the maxillofacial...,Zachariades N.,Journal of Maxillofacial Surgery,,10,C,123-125,...,undefined,0,False,03010503,S0301050382800246,,,,6954224,[KAT Hospital]
