## Examples of retrieving data from Scopus using Scopus API and Elsapy  
Elsapy: https://github.com/ElsevierDev/elsapy (Requres Python 3.x)  
Elsevier Scopus APIs https://dev.elsevier.com/sc_apis.html

In [1]:
#pip install elsapy

In [1]:
from elsapy.elsclient import ElsClient
from elsapy.elssearch import ElsSearch
from elsapy.elsprofile import ElsAuthor
from elsapy.elsdoc import AbsDoc
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

import json

### 1. Define some functions that we will use later

In [2]:
def affiliation_id_serch(schoolname, client):
    """Search affiliation ID by name
    client - object of the ElsClient class  """
    
    school_srch = ElsSearch(' AFFIL(%s)'%schoolname,'affiliation')
    school_srch.execute(client)
    return school_srch.results

def search_auth_by_name(fstname, lstname, schoolid, client):
    """Find author's Scopus ID by first, last name and affiliation ID.
    In case of an error, perform search without affiliation ID and return first author in the serach result
    """
    auth_srch = ElsSearch('AUTHLASTNAME(%s)'%lstname + ' AUTHFIRST(%s)'%fstname + ' AF-ID(%s)'%schoolid,'author')
    auth_srch.execute(client)

    #print ("auth_srch has", len(auth_srch.results), "results.")
    authorfound = auth_srch.results[0]
    
    if 'error' in authorfound.keys():
        status = 'error'
        auth_srch = ElsSearch('AUTHLASTNAME(%s)'%lstname + ' AUTHFIRST(%s)'%fstname ,'author')
        auth_srch.execute(client)

        #print ("auth_srch has", len(auth_srch.results), "results.")
        authorfound = auth_srch.results[0]
        if 'error' in authorfound.keys():
            status = 'error'
            return status, {}
        if 'affiliation-current' in authorfound.keys():
            if authorfound['affiliation-current']['affiliation-id']!=schoolid:
                status = 'new-affil'
            else:
                status = 'success'
        else:
            print(fstname, lstname, ' --- can not find affiliation-current in keys')
            status = 'warning'
    else:
        status = 'success'
    
    return status, authorfound

def auth_metrics(auth_id, client):
    """Read author metrics for a given Scopus author ID auth_id
    client - object of the ElsClient class 
    reurns status, author data, total number of paers, citations and h-index """
    
    my_auth = ElsAuthor(uri = 'https://api.elsevier.com/content/author/author_id/'+auth_id) 

    if my_auth.read(client):
        status = 'success'
    else:
        status = 'error'
        return status, None, None, None, None
    
    my_auth.read_metrics(client)

    if my_auth._data==None:
        npapers, ncitation, hindex = None, None, None
        status = 'error'
    else:
        status = 'success'
        
        npapers = my_auth._data['coredata']['document-count']
        ncitation = my_auth._data['coredata']['citation-count']
        hindex = my_auth._data['h-index']
        
    return status,my_auth._data, npapers, ncitation, hindex

def author_pubs(author_id, client):
    """
    Obtain publication list for a given Scopus author ID author_id
    client - object of the ElsClient class
    returns publication list and full output of the ElsSearch search request
    """
    doc_srch = ElsSearch('AU-ID(%s)'%author_id,'scopus')
    doc_srch.execute(client, get_all = True)

  
    pubs = []
    for rslt in doc_srch.results:
               
        year = rslt['prism:coverDate'].split('-')[0]
        citedby = rslt['citedby-count']
        scopusid = rslt['dc:identifier'].split(':')[1]
        title = rslt['dc:title']
        jrnlname = rslt['prism:publicationName']
        pubs.append({'year':year, 'cited':citedby, 'scopusid': scopusid, 'title': title,'jrnlname': jrnlname})
 
    return pubs, doc_srch.results


## 2. Initialize client (object of the ElsClient class) with your apikey

In [3]:
apikey = '7762f59ce3b9cb9117c74958bab4202b' # insert a valid apikey

## Initialize client
client = ElsClient(apikey)
client.inst_token = '' # leave it blank unless you have it

## 3. Find a unique Scopus affiliation ID for a school of interest

In [3]:
school_srch_results = affiliation_id_serch('um6p', client)

print('Found ', len(school_srch_results), ' schools \n')

print('{:<15} {:>}'.format('Affiliation ID', '| Afiiliation Name'))
print('-'*40)

for school in school_srch_results:
    school_id =  school['dc:identifier'].split(':')[1]
    school_name = school['affiliation-name']
    print( '{:<15}  {:>}'.format(school_id, school_name))


NameError: name 'client' is not defined

##### Search resulted in 25 different affiliations. First one is the one we want

In [8]:
school = school_srch_results[0]

#keep Scopus ID in the school_id variable
school_id = school['dc:identifier'].split(':')[1]
school_name = school['affiliation-name']


Here is other information we already know about this school from the search result

In [6]:
school

{'@_fa': 'true',
 'link': [{'@_fa': 'true',
   '@ref': 'self',
   '@href': 'https://api.elsevier.com/content/affiliation/affiliation_id/60192082'},
  {'@_fa': 'true',
   '@ref': 'search',
   '@href': 'https://api.elsevier.com/content/search/scopus?query=af-id%2860192082%29'},
  {'@_fa': 'true',
   '@ref': 'scopus-affiliation',
   '@href': 'https://www.scopus.com/affil/profile.uri?afid=60192082&partnerID=HzOxMe3b&origin=inward'}],
 'prism:url': 'https://api.elsevier.com/content/affiliation/affiliation_id/60192082',
 'dc:identifier': 'AFFILIATION_ID:60192082',
 'eid': '10-s2.0-60192082',
 'affiliation-name': 'Mohammed VI Polytechnic University',
 'name-variant': [{'@_fa': 'true', '$': 'Mohammed Vi Polytechnic University'}],
 'document-count': '4575',
 'city': 'Ben Guerir',
 'country': 'Morocco',
 'parent-affiliation-id': '0'}

## 4. Find author by last and firsrt name and obtain his unique Scopus ID

In [27]:
first_name = 'Gérard'
last_name = 'Bruno'

auth_srch = ElsSearch('AUTHLASTNAME(%s)'%last_name + ' AUTHFIRST(%s)'%first_name,'author')
auth_srch.execute(client)

print ("Found ", len(auth_srch.results), " authors \n")
authorfound = auth_srch.results[0]

print('{:<6} {:<6} {:<12} {:<15} {:>}'.format('First name |', 'Last name |', 'Scopus ID |', 'Affil ID', '| Affil name'))
print('-'*80)
for author in auth_srch.results:
    #let's look on every author and print the name and affiliaiton stored in Scopus  
    author_id = author['dc:identifier'].split(':')[1]
    first_name_scopus = author['preferred-name']['given-name']
    last_name_scopus = author['preferred-name']['surname']
    affil_name = author['affiliation-current']['affiliation-name']
    affil_id = author['affiliation-current']['affiliation-id']
#     print(author)
    print('{:<12} {:<11} {:<14} {:<14} {:>}'.format(first_name_scopus, last_name_scopus, author_id, affil_id, affil_name))


Found  5  authors 

First name | Last name | Scopus ID |  Affil ID        | Affil name
--------------------------------------------------------------------------------
Bruno        Lebon       55148141500    60020623       Brunel University London
Gérard       Bruno       57197151789    60029640       European Commission
Gerard       Bruno       7202704772     60021160       Cleveland Clinic Foundation
Gérard       Bruno       23481439900    60123163       INRAE
Gerard J.    Bruno       22933342400    60029833       University of Oklahoma Health Sciences Center


In [28]:
# Specify the Scopus ID of the author you want to search for
scopus_id = '8963275500'  # Replace 'AUTHOR_ID_HERE' with the actual Scopus ID

# Perform the search using the Scopus ID
auth_srch = ElsSearch('AU-ID(%s)' % scopus_id, 'author')
auth_srch.execute(client)

print("Found ", len(auth_srch.results), " authors \n")

print('{:<12} {:<12} {:<12} {:<15} {:>}'.format('First name |', 'Last name |', 'Scopus ID |', 'Affil ID', '| Affil name'))
print('-'*80)
for author in auth_srch.results:
    author_id = author['dc:identifier'].split(':')[1]
    first_name_scopus = author['preferred-name']['given-name']
    last_name_scopus = author['preferred-name']['surname']
    affil_name = author['affiliation-current']['affiliation-name']
    affil_id = author['affiliation-current']['affiliation-id']
    print('{:<12} {:<12} {:<12} {:<15} {:>}'.format(first_name_scopus, last_name_scopus, author_id, affil_id, affil_name))

Found  1  authors 

First name | Last name |  Scopus ID |  Affil ID        | Affil name
--------------------------------------------------------------------------------
Ismaïl       Berrada      8963275500   60192082        Mohammed VI Polytechnic University


#### We are looking for author number 4 in the above list. 
#### We can select him manulay or specify a shool name or school affiliation ID in the search query

In [13]:
last_name = 'Morozov'
first_name = 'Yurii'
school_id = '60021508'

auth_srch = ElsSearch('AUTHLASTNAME(%s)'%last_name + ' AUTHFIRST(%s)'%first_name + ' AF-ID(%s)'%school_id, 'author')
auth_srch.execute(client)

print ("Found ", len(auth_srch.results), " authors ")
authorfound = auth_srch.results[0]

for author in auth_srch.results:
    #let's look on every author and print the name and affiliaiton stored in Scopus  
    author_id = author['dc:identifier'].split(':')[1]
    first_name_scopus = author['preferred-name']['given-name']
    last_name_scopus = author['preferred-name']['surname']
    affil_name = author['affiliation-current']['affiliation-name']
    affil_id = author['affiliation-current']['affiliation-id']
    
    print('{:<12} {:<11} {:<14} {:<14} {:>}'.format(first_name_scopus, last_name_scopus, author_id, affil_id, affil_name))

Found  1  authors 
Yurii V.     Morozov     55966498300    60021508       University of Notre Dame


This looks better.  
Keep in mind however, that if author's current affiliation ID is different from the one specified in the request, search will result in error. 

#### Lets look what other information about the author we already know

In [9]:
author

{'@_fa': 'true',
 'link': [{'@_fa': 'true',
   '@ref': 'self',
   '@href': 'https://api.elsevier.com/content/author/author_id/55966498300'},
  {'@_fa': 'true',
   '@ref': 'search',
   '@href': 'https://api.elsevier.com/content/search/author?query=au-id%2855966498300%29'},
  {'@_fa': 'true',
   '@ref': 'scopus-citedby',
   '@href': 'https://www.scopus.com/author/citedby.uri?partnerID=HzOxMe3b&citedAuthorId=55966498300&origin=inward'},
  {'@_fa': 'true',
   '@ref': 'scopus-author',
   '@href': 'https://www.scopus.com/authid/detail.uri?partnerID=HzOxMe3b&authorId=55966498300&origin=inward'}],
 'prism:url': 'https://api.elsevier.com/content/author/author_id/55966498300',
 'dc:identifier': 'AUTHOR_ID:55966498300',
 'eid': '9-s2.0-55966498300',
 'preferred-name': {'surname': 'Morozov',
  'given-name': 'Yurii V.',
  'initials': 'Y.V.'},
 'name-variant': [{'@_fa': 'true',
   'surname': 'Morozov',
   'given-name': 'Yurii',
   'initials': 'Y.'}],
 'document-count': '17',
 'subject-area': [{'@abb

#### As you can see, search result dictionary contains number of publications for a given author accesible via 'document-count' key


In [21]:
print("Author", author['preferred-name']['given-name'], author['preferred-name']['surname'], "has", author['document-count'], " publications")
print("Scopus ID:", author['dc:identifier'].split(':')[1])

Author Yurii V. Morozov has 17  publications
Scopus ID: 55966498300


In [22]:
first_name = 'Yurii'
last_name = 'Morozov'
school_id = '60021508'

search_auth_by_name(first_name, last_name, school_id, client)

('success',
 {'@_fa': 'true',
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/author/author_id/55966498300'},
   {'@_fa': 'true',
    '@ref': 'search',
    '@href': 'https://api.elsevier.com/content/search/author?query=au-id%2855966498300%29'},
   {'@_fa': 'true',
    '@ref': 'scopus-citedby',
    '@href': 'https://www.scopus.com/author/citedby.uri?partnerID=HzOxMe3b&citedAuthorId=55966498300&origin=inward'},
   {'@_fa': 'true',
    '@ref': 'scopus-author',
    '@href': 'https://www.scopus.com/authid/detail.uri?partnerID=HzOxMe3b&authorId=55966498300&origin=inward'}],
  'prism:url': 'https://api.elsevier.com/content/author/author_id/55966498300',
  'dc:identifier': 'AUTHOR_ID:55966498300',
  'eid': '9-s2.0-55966498300',
  'preferred-name': {'surname': 'Morozov',
   'given-name': 'Yurii V.',
   'initials': 'Y.V.'},
  'name-variant': [{'@_fa': 'true',
    'surname': 'Morozov',
    'given-name': 'Yurii',
    'initials': 'Y.'}],
  'document-co

#### In the following example we will try to find author's Scopus ID using his last and first names and affiliation ID. 
#### And in case of an error, perform a search only with first and last name. Use *search_auth_by_name()* function defined in the beginning.

In [23]:
fstname = 'Zachary'
lstname = 'Schultz'
school_id = '60021508'

status, author  = search_auth_by_name(fstname, lstname, school_id, client)

print('provided affiliation ID ', school_id)
print('Search status =', status)
print('Current affiliation = ', author['affiliation-current']['affiliation-name'], author['affiliation-current']['affiliation-id'])

print("Author",author['preferred-name']['given-name'], author['preferred-name']['surname'], "has", author['document-count'], " publications")
print("Scopus ID:", author['dc:identifier'].split(':')[1])

provided affiliation ID  60021508
Search status = success
Current affiliation =  University of Notre Dame 60021508
Author Zachary D. Schultz has 105  publications
Scopus ID: 6506722050


Satus 'new-affil' means that author was not found with provided affiliation id

## 5. Obtain number of publications, citation count, and h-index for a given author 

In [None]:
affil_name = author['affiliation-current']['affiliation-name']

In [19]:
author_scopus_id = '8963275500'
status, search_result, npubs, ncits, hindex = auth_metrics(author_scopus_id, client)

print(search_result['author-profile']['preferred-name']['given-name'], search_result['author-profile']['preferred-name']['surname'])
print("Number of publications %i."%npubs, "Citations %i."%ncits, " h-index %i."%hindex)

Ismaïl Berrada
Number of publications 75. Citations 448.  h-index 10.


##### As perviously dictionary search_result contains more information about the author such as affiliation history, name variants, journals author has publications, etc.  
##### Below is the list of dicitonary keys

In [25]:
search_result.keys()

dict_keys(['@status', '@_fa', 'coredata', 'affiliation-current', 'affiliation-history', 'subject-areas', 'author-profile', 'h-index'])

##### Among others, I found useful 'publication-range' in 'author-profile' providing interval of years when current author published papers

In [26]:
search_result['author-profile']['publication-range']

{'@end': '2020', '@start': '2013'}

## 6. Obtain list of publications for a given author using author's Scopus ID

In [15]:
author_scopus_id = '8963275500'
publications, full_result = author_pubs(author_scopus_id, client)

In [91]:
#publications

In [None]:
df = pd.DataFrame(publications)
df.head(5)

In [None]:
df= df[['title','year','cited']]
df

In [49]:
#df.to_csv('ttt.csv')

In [68]:
#df = pd.read_csv('scopus1.csv')
#df= df[['Title','Year','Cited by']]
# Transform the DataFrame to text
#text_list = df.apply(lambda row: f"The paper titled '{row['Title']}' from the year {row['Year']} was cited {row['Cited by']} times.", axis=1).tolist()
#text = "\n".join(text_list)
#text



In [80]:
# Transform the DataFrame to text
text_list = df.apply(lambda row: f"The paper titled '{row['title']}' from the year {row['year']} was cited {row['cited']} times.", axis=1).tolist()
text = "\n".join(text_list)
text

'The paper titled \'Addressing sustainable energy intermittence for green ammonia production\' from the year 2023 was cited 4 times.\nThe paper titled \'Extension of Geldart classification based on the magnitude of interparticle forces\' from the year 2023 was cited 0 times.\nThe paper titled \'Reaction mechanism of thermal decomposition of Phosphogypsum\' from the year 2023 was cited 0 times.\nThe paper titled \'An improved position reconstruction method for radioactive particle tracking\' from the year 2023 was cited 0 times.\nThe paper titled \'Quantification of interparticle forces in gas-agglomerated particles fluidized beds\' from the year 2023 was cited 1 times.\nThe paper titled \'High quality products from microwave catalytic pyrolysis of heavy oil and polyethylene\' from the year 2023 was cited 0 times.\nThe paper titled \'Temperature Distribution Assessment in Gas-Solid Reactive and Nonreactive Systems Heated by Microwaves\' from the year 2023 was cited 2 times.\nThe paper t

In [81]:
def count_tokens(text):
    return len(text.split())

#text = "This is a sample sentence to calculate the number of tokens."
print(count_tokens(text))

7870


In [82]:
#pip install spacy
#!python -m spacy download en_core_web_sm

In [83]:
import spacy

nlp = spacy.load("en_core_web_sm")

def count_tokens(text):
    doc = nlp(text)
    return len([token.text for token in doc])

#textt = "OpenAI's large language models (sometimes referred to as GPT's) process text using tokens, which are common sequences of characters found in a set of text. The models learn to understand the statistical relationships between these tokens, and excel at producing the next token in a sequence of tokens.You can use the tool below to understand how a piece of text might be tokenized by a language model, and the total count of tokens in that piece of text.It's important to note that the exact tokenization process varies between models. Newer models like GPT-3.5 and GPT-4 use a different tokenizer than our legacy GPT-3 and Codex models, and will produce different tokens for the same input text."
print(count_tokens(text))


10009


In [84]:
from transformers import GPT2Tokenizer
# Initialize the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# Tokenize the text and count the number of tokens
tokens = tokenizer.encode(text, return_tensors='pt')
num_tokens = len(tokens[0])
print(f"Number of tokens: {num_tokens}")

Token indices sequence length is longer than the specified maximum sequence length for this model (11818 > 1024). Running this sequence through the model will result in indexing errors


Number of tokens: 11818


Variable *publications* is a list of papers, where for every paper corresponding dictionary contains publiaction year ('year'),  
number of citation ('cited'), scopus ID of this document ('scopusid'), title ('title') and journal where the paper was published ('journal')

In [11]:
for publ in publications:
    print('')
    print(publ['year'], publ['title'])
    print( 'cited ', publ['cited'], 'times',  ' scopus ID:', publ['scopusid'])
    print('published in:', publ['journal'])


2023 Machine Learning Based Recommender Systems for Crop Selection: A Systematic Literature Review
cited  0 times  scopus ID: 85168996421
published in: Studies in Computational Intelligence

2022 A Systematic National Stocktake of Crop Models in Morocco
cited  7 times  scopus ID: 85131224557
published in: Ecological Modelling

2021 A review of center of pressure (COP) variables to quantify standing balance in elderly people: Algorithms and open-access code*
cited  53 times  scopus ID: 85119482532
published in: Physiological Reports

2019 New security architecture using hybrid IDS for virtual private clouds
cited  2 times  scopus ID: 85078316999
published in: 2019 3rd International Conference on Intelligent Computing in Data Sciences, ICDS 2019

2015 A proposal for improving spoken dialog systems using context information fusion
cited  0 times  scopus ID: 84960533080
published in: 2015 18th International Conference on Information Fusion, Fusion 2015

2015 Modeling human-machine interac



If you need more detailes about a publication *full_result* conatains more information

In [12]:
len(full_result), type(full_result)

(10, list)

In [15]:
#full_result

In [13]:
full_result[0].keys()

dict_keys(['@_fa', 'link', 'prism:url', 'dc:identifier', 'eid', 'dc:title', 'dc:creator', 'prism:publicationName', 'prism:issn', 'prism:eIssn', 'prism:volume', 'prism:pageRange', 'prism:coverDate', 'prism:coverDisplayDate', 'prism:doi', 'citedby-count', 'affiliation', 'prism:aggregationType', 'subtype', 'subtypeDescription', 'source-id', 'openaccess', 'openaccessFlag', 'freetoread', 'freetoreadLabel'])

#### For every publucation full_result list contains things like issn, doi, type of the journal, etc. 
#### See full output for one of the publicaitons below

In [166]:
#full_result[0]

### 7. Obtain detailed information about a given publication using Scopus ID

In [311]:
publication_scopus_id = '85168806520'
scp_doc = AbsDoc(scp_id = publication_scopus_id)
if scp_doc.read(client):
    print("Publication record obtained successfully")
    result = scp_doc.data
else:
    print("Something went wrong ... ")
    result = ''

Publication record obtained successfully


#### result contains a lot of information about the paper, author list and their affiliations, abstract of the publication, reference list, etc.  
below is the abstract of the given paper

In [312]:
scp_doc.data['item']['bibrecord']['head']['abstracts']#reference (['head', 'item-info', 'tail'])

'© 2023 by the authors.Accurate and spatially distributed precipitation data are fundamental to effective water resource management. In Morocco, as in other arid and semi-arid regions, precipitation exhibits significant spatial and temporal variability. Indeed, there is an intra- and inter-annual variability and the northwest is rainier than the rest of the country. In the Bouregreg watershed, this irregularity, along with a sparse gauge network, poses a major challenge for water resource management. In this context, remote sensing data could provide a viable alternative. This study aims precisely to evaluate the performance of four gridded daily precipitation products: three IMERG-V06 datasets (GPM-F, GPM-L, and GPM-E) and a reanalysis product (ERA5). The evaluation is conducted using 11 rain gauge stations over a 20-year period (2000–2020) on various temporal scales (daily, monthly, seasonal, and annual) using a pixel-to-point approach, employing different classification and regressi

In [244]:
# Parse the JSON data
data = scp_doc.data

# Extract the authors' names
authors = []
author_groups = data["item"]["bibrecord"]["head"]["author-group"]
for author_group in author_groups:
    print(author_group)
    for author in author_group["author"]:
        full_name = f"{author['ce:given-name']} {author['ce:surname']}"
        authors.append(full_name)

# Extract the abstract
abstract = data["item"]["bibrecord"]["head"]["abstracts"]

# Print the authors' names and the abstract
print("Authors:", authors)
print("\nAbstract:", abstract)

affiliation


TypeError: string indices must be integers

In [89]:
#author_groups

In [162]:
# Extract the authors' information including name, organization, and affiliation
authors_info = []
author_groups = data["item"]["bibrecord"]["head"]["author-group"]
for author_group in author_groups:
    for author in author_group["author"]:
        full_name = f"{author['ce:given-name']} {author['ce:surname']}"
        organizations = [org["$"] for org in author_group["affiliation"]["organization"]]
        authors_info.append({
            "name": full_name,
            "organizations": organizations
        })

# Extract the abstract
abstract = data["item"]["bibrecord"]["head"]["abstracts"]

# Print the authors' names, organizations, and the abstract
for author_info in authors_info:
    print("Author:", author_info["name"])
    print("Organizations:", ", ".join(author_info["organizations"]))
print("\nAbstract:", abstract)

Author: Oumnia Ennaji
Organizations: Chair of Soil Science, University Mohammed VI Polytechnic
Author: Leonardus Vergütz
Organizations: Chair of Soil Science, University Mohammed VI Polytechnic
Author: Oumnia Ennaji
Organizations: African Genome Center, University Mohammed VI Polytechnic
Author: Achraf El Allali
Organizations: African Genome Center, University Mohammed VI Polytechnic

Abstract: © 2023 The AuthorsIn agriculture, precise fertilization and effective nutrient management are critical. Machine learning (ML) has recently been increasingly used to develop decision support tools for modern agricultural systems, including nutrient management, to improve yields while reducing expenses and environmental impact. ML based systems require huge amounts of data from different platforms to handle non-linear tasks and build predictive models that can improve agricultural productivity. This study reviews machine learning based techniques for estimating fertilizer and nutrient status that 

In [181]:
import pandas as pd

# Extract the authors' information including name, organization, and affiliation
authors_info = []
author_groups = data["item"]["bibrecord"]["head"]["author-group"]
for author_group in author_groups:
    for author in author_group["author"]:
        full_name = f"{author['ce:given-name']} {author['ce:surname']}"
        organizations = [org["$"] for org in author_group.get("affiliation", {}).get("organization", [])]  # Handling potential absence of affiliation or organization key
        authors_info.append({
            "name": full_name,
            "organizations": ", ".join(organizations)  # Join organizations with commas
        })

# Create a DataFrame from the authors_info list of dictionaries
df = pd.DataFrame(authors_info)

# Save the DataFrame to a CSV file
#df.to_csv('authors_info.csv', index=False)

# Extract the abstract
abstract = data["item"]["bibrecord"]["head"]["abstracts"]

# Print the authors' names, organizations, and the abstract
for index, author_info in df.iterrows():
    print("Author:", author_info["name"])
    print("Organizations:", author_info["organizations"])
print("\nAbstract:", abstract)


Unnamed: 0,name,organizations
0,Oumnia Ennaji,"Chair of Soil Science, University Mohammed VI ..."
1,Leonardus Vergütz,"Chair of Soil Science, University Mohammed VI ..."
2,Oumnia Ennaji,"African Genome Center, University Mohammed VI ..."
3,Achraf El Allali,"African Genome Center, University Mohammed VI ..."


In [153]:
#scp_doc.data['item']['bibrecord']['head']['author-group']

#### Save information about this publication to json file *publication_record.json*

In [134]:
with open('publication_record.json', 'w') as fp:
    json.dump(result, fp, indent=4) 

In [167]:
#####################################

In [26]:
author_scopus_id = '38861057100'
publications, full_result = author_pubs(author_scopus_id, client)
dff = pd.DataFrame(publications)
dff

Unnamed: 0,year,cited,scopusid,title,jrnlname
0,2024,1,85196673994,Species interactions and bacterial inoculation...,Environmental and Experimental Botany
1,2024,1,85194922437,Drought-tolerant rhizobacteria with predicted ...,Microbiological Research
2,2024,0,85192324913,Use of organo-mineral amendments and Lupinus a...,Environmental and Experimental Botany
3,2024,0,85196042809,Multifunctional agricultural inputs based on b...,Algal Research
4,2024,1,85189301682,Phosphate bio-solubilization and cadmium toler...,Environmental and Experimental Botany
...,...,...,...,...,...
57,2012,26,84858674150,Identification at the species and symbiovar le...,Systematic and Applied Microbiology
58,2011,16,80052648526,Nodule phosphorus requirement and O <inf>2</in...,Acta Agriculturae Scandinavica Section B: Soil...
59,2011,28,84876434387,"Effect of salinity on nodulation, nitrogen fix...",Symbiosis
60,2011,55,79960757384,Agro-physiological responses of Moroccan alfal...,Seed Science and Technology


In [27]:
dff.to_csv('Bargaz, Adnane.csv',index = False )

In [30]:
def get_authors_and_abstracts(scopusid):
    scp_doc = AbsDoc(scp_id = scopusid)
    if scp_doc.read(client):
        result = scp_doc.data
        # Extracting authors
        #authors_info = []
        #author_groups = result["item"]["bibrecord"]["head"]["author-group"]
        #for author_group in author_groups:
           # for author in author_group["author"]:
             #   full_name = f"{author['ce:given-name']} {author['ce:surname']}"
              #  authors_info.append(full_name)
        #authors = ', '.join(authors_info)
        # Extracting abstracts
        abstracts = result["item"]["bibrecord"]["head"]["abstracts"]
        return pd.Series([abstracts], index=['Abstracts'])
    else:
        return pd.Series([None, None], index=['Abstracts'])

# Applying the function to each row of the dataframe
dff[['Abstracts']] = dff['scopusid'].apply(lambda x: get_authors_and_abstracts(x))

KeyboardInterrupt: 

In [None]:
dff

In [None]:
# Transform the DataFrame to text
text_list = dff.apply(lambda row: f"The paper titled '{row['title']}' from the year {row['year']} was cited {row['cited']} times.", axis=1).tolist()
text = "\n".join(text_list)
text

In [216]:
#for t in dff['scopusid'].head(5):
scp_doc = AbsDoc(scp_id = '85163336572')
result = scp_doc.data
scp_doc

<elsapy.elsdoc.AbsDoc at 0x1f285e42280>

In [322]:
scp_doc = AbsDoc(scp_id = '85171596509')
if scp_doc.read(client):
    print("Publication record obtained successfully")
    result = scp_doc.data
else:
    print("Something went wrong ... ")
    result = ''

Publication record obtained successfully


In [323]:
article_title = result["item"]["bibrecord"]["head"]["citation-title"]
article_title

'Energy efficiency and hygrothermal performance of hemp clay walls for Moroccan residential buildings: An integrated lab-scale, in-situ and simulation-based assessment'

In [88]:
#result

In [324]:
with open('result.json', 'w') as file:
    json.dump(data, file, indent=4, sort_keys=True)

In [319]:
dz = pd.DataFrame()
for t in dff['scopusid']:
    scp_doc = AbsDoc(scp_id = t)
    if scp_doc.read(client):
        result = scp_doc.data
    else:
        print("Something went wrong ... ")
    #data = scp_doc.data
    print(t)
    #print(result)
    # Extract the article title
    article_title = result["item"]["bibrecord"]["head"]["citation-title"]

    # Extract authors' information including name and affiliation
    authors_info = []
    author_groups = result["item"]["bibrecord"]["head"]["author-group"]
    for author_group in author_groups:
        for author in author_group["author"]:
            full_name = f"{author['ce:given-name']} {author['ce:surname']}"
            #organizations = [org["$"] for org in author_group["affiliation"]["organization"]]
            authors_info.append({
                "Author": full_name
               # "Affiliation": ", ".join(organizations)
            })
    # Create a DataFrame
    df = pd.DataFrame(authors_info)

    # Add Scopus ID and Article Title columns
    #df["ScopusID"] = "your_scopus_id_here"  # Replace with the actual Scopus ID
    df["Article Title"] = article_title

    # Reorder columns
    df = df[[ "Article Title", "Author"]]
    # Print the DataFrame
    df = df.groupby('Article Title').agg({
        'Author': ', '.join
    }).reset_index()
    df['scopusid'] = t
    df['abstracts'] = result["item"]["bibrecord"]["head"]["abstracts"]
    # Concatenate the dataframes vertically (along rows)
    dz = pd.concat([df,dz])
    # Reset the index of the concatenated dataframe
    dz.reset_index(drop=True, inplace=True)
dz   

85163336572
85161355854
85149836777
85164146999


KeyboardInterrupt: 

In [318]:
#dz['abstracts']

In [328]:

# Extract the article title
article_title = data["item"]["bibrecord"]["head"]["citation-title"]

# Extract authors' information including name and affiliation
authors_info = []
author_groups = data["item"]["bibrecord"]["head"]["author-group"]
for author_group in author_groups:
    for author in author_group["author"]:
        full_name = f"{author['ce:given-name']} {author['ce:surname']}"
        #organizations = [org["$"] for org in author_group["affiliation"]["organization"]]
        authors_info.append({
            "Author": full_name
           # "Affiliation": ", ".join(organizations)
        })
# Create a DataFrame
df = pd.DataFrame(authors_info)

# Add Scopus ID and Article Title columns
#df["ScopusID"] = "your_scopus_id_here"  # Replace with the actual Scopus ID
df["Article Title"] = article_title

# Reorder columns
df = df[[ "Article Title", "Author"]]
# Print the DataFrame
df = df.groupby('Article Title').agg({
    'Author': ', '.join
}).reset_index()
df['scopusid'] = t
df

TypeError: list indices must be integers or slices, not str

In [327]:
import requests
import json


def fetch_scopus_data(start=0, count=25):
    base_url = "https://api.elsevier.com/content/search/scopus"
    
    headers = {
        "Accept": "application/json",
        "X-ELS-APIKey": '7762f59ce3b9cb9117c74958bab4202b'
    }
    
    params = {
        "query": "AFFIL(um6p)",
        "start": start,
        "count": count
    }
    
    response = requests.get(base_url, headers=headers, params=params)
    
    return response.json()

# total number of documents to fetch
total_documents = 200
count_per_request = 25  # you can adjust this number based on your preference or Scopus API limits


dz = pd.DataFrame(authors_info)
for start in range(0, total_documents, count_per_request):
    data = fetch_scopus_data(start=start, count=count_per_request)
    #print(json.dumps(data, indent=2))  # Pretty print the JSON data, or you can process/save data as needed
    # Assuming json_data contains your JSON object
    json_data = data

    entries = json_data['search-results']['entry']

    # Extracting the information
   
    data = []
    for entry in entries:
        title = entry.get('dc:title')
        authors = entry.get('dc:creator')
        source = entry.get('prism:publicationName')
        year = entry.get('prism:coverDate')
        citations = entry.get('citedby-count')
        Source_type = entry.get('subtypeDescription')
        openaccess = entry.get('openaccess')
        SCOPUS_ID = entry.get('dc:identifier')

        data.append([title, authors, source, year, citations,Source_type,openaccess,SCOPUS_ID])

    # Creating a pandas DataFrame
    columns = ['Document title', 'Authors', 'Source', 'Year', 'Citations','Source_type','openaccess','SCOPUS_ID']
    df = pd.DataFrame(data, columns=columns)
    dz = pd.concat([df,dz])
    # Reset the index of the concatenated dataframe
    dz.reset_index(drop=True, inplace=True)
    # Now, df contains the extracted data as a pandas DataFrame
dz['SCOPUS_ID'] = dz['SCOPUS_ID'].str.replace('SCOPUS_ID:', '', regex=False)
dz

Unnamed: 0,Document title,Authors,Source,Year,Citations,Source_type,openaccess,SCOPUS_ID,Author
0,Comparison of Phenol Adsorption Property and M...,Dehmani Y.,Water (Switzerland),2023-05-01,4,Article,1,85160597789,
1,Assessment of GPM Satellite Precipitation Perf...,Benkirane M.,Atmosphere,2023-05-01,1,Article,1,85160587631,
2,Large Field Screening for Resistance to Broomr...,En-nahli Y.,Plants,2023-05-01,0,Article,1,85160574283,
3,First Report of the Branched Broomrape (Phelip...,El Amri M.,Horticulturae,2023-05-01,0,Article,1,85160250452,
4,PCA-based detection of phosphorous deficiency ...,El-Mejjaouy Y.,PLoS ONE,2023-05-01,0,Article,1,85160157715,
...,...,...,...,...,...,...,...,...,...
202,,,,,,,,,Cristian Mauricio Vega Cuichán
203,,,,,,,,,Ésio de Castro Paes
204,,,,,,,,,Cleberson Ribeiro
205,,,,,,,,,Leonardus Vergütz


In [302]:
dz.to_csv('all_the data.csv')

In [288]:
import requests
import json

def fetch_scopus_data(start=0, count=25):
    base_url = "https://api.elsevier.com/content/search/scopus"
    
    headers = {
        "Accept": "application/json",
        "X-ELS-APIKey": '7762f59ce3b9cb9117c74958bab4202b'  # Make sure api_key is defined
    }
    
    params = {
        "query": "AFFIL(um6p)",
        "start": start,
        "count": count
    }
    
    response = requests.get(base_url, headers=headers, params=params)
    
    return response.json()


# total number of documents to fetch
total_documents = 200
count_per_request = 25  # you can adjust this number based on your preference or Scopus API limits

# List to accumulate data
all_data = []

for start in range(0, total_documents, count_per_request):
    data = fetch_scopus_data(start=start, count=count_per_request)
    all_data.append(data)  # Add fetched data to the list

# Save all data to a JSON file
#with open('scopus_data.json', 'w') as file:
   # json.dump(all_data, file, indent=2)


In [289]:
all_data

[{'search-results': {'opensearch:totalResults': '1371',
   'opensearch:startIndex': '0',
   'opensearch:itemsPerPage': '25',
   'opensearch:Query': {'@role': 'request',
    '@searchTerms': 'AFFIL(um6p)',
    '@startPage': '0'},
   'link': [{'@_fa': 'true',
     '@ref': 'self',
     '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=AFFIL%28um6p%29',
     '@type': 'application/json'},
    {'@_fa': 'true',
     '@ref': 'first',
     '@href': 'https://api.elsevier.com/content/search/scopus?start=0&count=25&query=AFFIL%28um6p%29',
     '@type': 'application/json'},
    {'@_fa': 'true',
     '@ref': 'next',
     '@href': 'https://api.elsevier.com/content/search/scopus?start=25&count=25&query=AFFIL%28um6p%29',
     '@type': 'application/json'},
    {'@_fa': 'true',
     '@ref': 'last',
     '@href': 'https://api.elsevier.com/content/search/scopus?start=1346&count=25&query=AFFIL%28um6p%29',
     '@type': 'application/json'}],
   'entry': [{'@_fa': 'true',
     'li

In [296]:
import pandas as pd

# Assuming json_data contains your JSON object
json_data = all_data

entries = json_data[0]['search-results']['entry']

# Extracting the information
data = []
for entry in entries:
    title = entry.get('dc:title')
    authors = entry.get('dc:creator')
    #source = entry.get('prism:publicationName')
    year = entry.get('prism:coverDate')[:4] if entry.get('prism:coverDate') else None
    citations = entry.get('citedby-count')
    
    data.append([title, authors, source, year, citations])

# Creating a pandas DataFrame
columns = ['Document title', 'Authors', 'Year', 'Citations']
df = pd.DataFrame(data, columns=columns)

# Now, df contains the extracted data as a pandas DataFrame
df

Unnamed: 0,Document title,Authors,Source,Year,Citations
0,Investigating the long-term stability of photo...,Essahili O.,Journal of Photochemistry and Photobiology A: ...,2024,0
1,Comparison of Optimal Sensor Placement Technic...,Mghazli M.O.,Lecture Notes in Civil Engineering,2024,0
2,Multi-Agent Deep Reinforcement Learning for co...,Knari A.,Ad Hoc Networks,2024,0
3,"Climate impact, institutional context, and nat...",Yoon H.,Journal of Business Venturing,2024,0
4,Effect of Bilayer Variation on the Properties ...,Aouadi K.,Mechanisms and Machine Science,2024,0
5,Energy efficiency and hygrothermal performance...,Es-sakali N.,Applied Energy,2023,0
6,Effect of alkaline leaching of phosphogypsum o...,Bounaga A.,Science of the Total Environment,2023,1
7,Chlorophyll performances as an indicator of co...,El Hayany B.,International Journal of Recycling of Organic ...,2023,0
8,Impact of agricultural input subsidy policy on...,Camara A.,Economic Modelling,2023,0
9,XRF online analyzer for measurements of P<inf>...,Ben Amar I.,Scientific Reports,2023,0


In [20]:
import requests

# Define your API key
api_key = '7762f59ce3b9cb9117c74958bab4202b'

# Define the Scopus article ID
article_id = '85168996421'

# Define the API endpoint URL
url = f'https://api.elsevier.com/content/subject/scopus?description={article_id}'

# Set the headers with your API key
headers = {
    'Accept': 'application/json',
    'X-ELS-APIKey': api_key,
}

# Make the GET request to the API
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Parse and use the response data
    data = response.json()
    print(data)
else:
    # Handle errors
    print(f"Error: {response.status_code} - {response.text}")


{'subject-classifications': {'error': 'No results found'}}
