In [1]:
from pybliometrics.scopus import ScopusSearch

In [2]:
# A bug seems to prevent the library to locate the API key.
# This might help.
! export PYB_CONFIG_FILE=~/.pybliometrics/config.ini

# Query string

In [3]:
# Define the search string to use. Any query that works on Advanced Search
# https://www.scopus.com/search/form.uri?display=advanced will also work here.
#query = "eDNA"
query = "eDNA AND metabarcoding AND marine AND diatom"

# Search Scopus

In [4]:
# ScopusSearch(query, 
#              refresh=False, 
#              view=None, 
#              verbose=False, 
#              download=True, 
#              integrity_fields=None, 
#              integrity_action='raise', 
#              subscriber=True, **kwds)
# https://pybliometrics.readthedocs.io/en/stable/classes/ScopusSearch.html

search = ScopusSearch(query, refresh=True, view="COMPLETE", verbose=True)

Downloading results for query "eDNA AND metabarcoding AND marine AND diatom":


100%|██████████| 15/15 [00:15<00:00,  1.02s/it]


# Explore the result

In [5]:
# More information about the atributes of the result can be found at
# https://dev.elsevier.com/sc_search_views.html 
# Title
print(search.results[0].title)

# Author keywords
keywords = []
for i in search.results[0].authkeywords.split("|"):
    keywords.append(i.strip())
print(keywords)

# Abstract
print(search.results[0].description)

# DOI
print(search.results[0].doi)

# eid
print(search.results[0].eid)

# pii
print(search.results[0].pii)

# Journal
print(search.results[0].publicationName)

# Date
print(search.results[0].coverDate)


Pelagic-benthic coupling of the microbial food web modifies nutrient cycles along a cascade-dammed river
['Beta diversity', 'Multi-trophic', 'Nutrient accumulation', 'Predator-prey', 'Reservoir']
Cascade dams disrupt the river continuum, altering hydrology, biodiversity and nutrient flux. Describing the diversity of multi-trophic microbiota and assessing microbial contributions to the ecosystem processes are prerequisites for the restoration of these aquatic systems. This study investigated the microbial food web structure along a cascade-dammed river, paying special attention to the multi-trophic relationships and the potential role of pelagic-benthic coupling in nutrient cycles. Our results revealed the discontinuity in bacterial and eukaryotic community composition, functional group proportion, as well as α-diversity due to fragmentation by damming. The high microbial dissimilarity along the river, with the total multi-trophic β-diversity was 0.84, was almost completely caused by sp

In [6]:
# Display the number of results found for this search string
search.get_results_size()

327

# Explore the result in Pandas

In [7]:
# Convert the search result to a Pandas dataframe 
import pandas as pd
df = pd.DataFrame(search.results)

In [8]:
df

Unnamed: 0,eid,doi,pii,pubmed_id,title,subtype,subtypeDescription,creator,afid,affilname,...,issueIdentifier,article_number,pageRange,description,authkeywords,citedby_count,openaccess,fund_acr,fund_no,fund_sponsor
0,2-s2.0-85114625330,10.1007/s11783-021-1484-5,,,Pelagic-benthic coupling of the microbial food...,ar,Article,Yang N.,60010851;100753814,Hohai University;Changjiang River Scientific R...,...,4,50,,"Cascade dams disrupt the river continuum, alte...",Beta diversity | Multi-trophic | Nutrient accu...,0,0,NNSFC,51779076,National Natural Science Foundation of China
1,2-s2.0-85121660542,10.1016/j.scitotenv.2021.152385,S0048969721074635,,A bacterial index to estimate lake trophic lev...,ar,Article,Pearman J.K.,60032987;60023517;60020376;60019354;60017311;6...,Griffith University;Northern Arizona Universit...,...,,152385,,Lakes and their catchments have been subjected...,Bacteria | Biomonitoring | Environmental DNA |...,0,0,AC,C05X1702,Auckland Council
2,2-s2.0-85121583318,10.1016/j.scitotenv.2021.152380,S0048969721074581,,Dynamics of microbiotic patterns reveal surfac...,ar,Article,Korbel K.L.,60028333;60019544;60017526;116662210,UNSW Sydney;Macquarie University;Western Austr...,...,,152380,,"Exchange between groundwater (GW), hyporheic z...",Connectivity | eDNA | Groundwater | Hyporheic ...,0,0,,undefined,Office of Water Science
3,2-s2.0-85118653253,10.1016/j.scitotenv.2021.151080,S0048969721061581,34678363,Wastewater constituents impact biofilm microbi...,ar,Article,Tamminen M.,60032398;60025858;60006876;60002612,University of Jyväskylä;ETH Zürich;Turun yliop...,...,,151080,,Microbial life in natural biofilms is dominate...,Bacterial community | Biofilm | Diatom | Micro...,0,1,EAWAG,undefined,Eidgenössische Anstalt für Wasserversorgung Ab...
4,2-s2.0-85119610672,10.1016/j.ecss.2021.107661,S0272771421005102,,Co-occurrence of Bacillariophyceae-based- and ...,ar,Article,Kang Y.,60068688;60007215,Gwangju Institute of Science and Technology;Ch...,...,,107661,,We examined microbial food webs in Seomjin Riv...,Bacillariophyceae | Cryptophyceae | Environmen...,0,1,MOF,NRF-2018-R1A6A1A-03024314,Ministry of Oceans and Fisheries
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322,2-s2.0-84940730866,10.1016/B978-0-12-417015-5.00010-4,B9780124170155000104,,Freshwater conservation and biomonitoring of s...,ch,Book Chapter,Gray C.,60106782;60105356;60025988;60025779;60020661;6...,The James Hutton Institute;School of Biologica...,...,,,241-271,,,2,0,QMUL,undefined,Queen Mary University of London
323,2-s2.0-84925612836,10.1111/mec.13136,,25735209,Reconstructing long-term human impacts on plan...,ar,Article,Pansu J.,60104653;60102124;60019513;60008134;127035527,Universite Grenoble Alpes;Université Fédérale ...,...,7,,1485-1498,Paleoenvironmental studies are essential to un...,anthropocene | environmental DNA | landscape h...,72,0,,undefined,
324,2-s2.0-84929174860,10.1080/08927014.2015.1028923,,25877857,Early detection of eukaryotic communities from...,ar,Article,Pochon X.,60041509;60020376;60005686;60004424,Klaipeda University;Cawthron;The University of...,...,3,,241-251,Marine biofilms are precursors for colonizatio...,18S rRNA gene | Ciona savignyi | High-throughp...,38,0,NIWA,2013/14 SC,National Institute of Water and Atmospheric Re...
325,2-s2.0-84911415683,10.1086/BBLv227n2p93,,25411369,Next-generation environmental diversity survey...,re,Review,Pawlowski J.,60013528;60004718,Institut de Recherche et Coordination Acoustiq...,...,2,,93-106,Foraminifera are commonly defined as marine te...,,49,0,SNF,150817,Schweizerischer Nationalfonds zur F&amp;#x00F6...


In [9]:
# Find papers with specific keywords
import numpy as np

keyword = "zeta diversity"
df[df.authkeywords.str.contains(keyword).replace(np.nan, False)]

Unnamed: 0,eid,doi,pii,pubmed_id,title,subtype,subtypeDescription,creator,afid,affilname,...,issueIdentifier,article_number,pageRange,description,authkeywords,citedby_count,openaccess,fund_acr,fund_no,fund_sponsor


# Explore keywords

In [10]:
import sys
sys.setrecursionlimit(15000)

# Recursive flattening of "list of lists"
# https://stackabuse.com/python-how-to-flatten-list-of-lists/
def flatten_list(list_of_lists):
    if len(list_of_lists) == 0:
        return list_of_lists
    if isinstance(list_of_lists[0], list):
        return flatten_list(list_of_lists[0]) + flatten_list(list_of_lists[1:])
    return list_of_lists[:1] + flatten_list(list_of_lists[1:])

# Combine all keywords in a list of lists
regular_list = df.authkeywords.str.split("|").to_list()
# Flatten the list of lists
fl = flatten_list(regular_list)
# Remove leading and trailing white space from keywords
fl = [str(i).strip() for i in fl]

# Count the keywords
count = {}
for i in fl:
    i = str(i)
    try:
        count[i] += 1
    except:
        count[i] = 1

# Turn the dictionary of keywords into a Pandas dataframe
kw = pd.DataFrame.from_dict(count, orient='index')

# Display the 20 top keywords
kw.sort_values(0, ascending=False).head(20)

Unnamed: 0,0
Metabarcoding,58
,44
eDNA,35
Biomonitoring,33
environmental DNA,33
metabarcoding,32
Environmental DNA,25
biomonitoring,21
DNA metabarcoding,19
Biodiversity,18


# Explore abstracts in html format

In [11]:
# Write title, Elsevier ID, keywords and Abstract to an html file
#original_abstracts = open("original_abstracts.html", "w")

with open("original_abstracts_Scopus.html", "w") as file:
    for paper in search.results:
        title = "<h1>" + paper.title + "</h1>" + "\n"
        eid = "<p>" + paper.eid + "</p>" + "\n"
        try:
            doi = "<p><a href=\"https://doi.org/" + paper.doi + "\">" + "doi:" + paper.doi + "</a></p>" + "\n"
        except TypeError:
            doi = "<p>No DOI</p>"
        if paper.authkeywords is None:
            keywords = "<p>No keywords</p>"
        else:
            keywords = paper.authkeywords
        abstract = "<p>" + str(paper.description) + "</p>" + "\n"
        string = title + eid + doi + keywords + abstract
        
        file.write(string)

# Download fulltext results in XML format

In [None]:
import pybliometrics.scopus
import requests

# Option 1.

for paper in search.results:
    doi = paper.doi
    r = requests.get(f"https://api.elsevier.com/content/article/doi/{doi}?apiKey={pybliometrics.scopus.KEYS[0]}")
    print(str(r.content))

b'<service-error><status><statusCode>RESOURCE_NOT_FOUND</statusCode><statusText>The resource specified cannot be found.</statusText></status></service-error>'
b'<full-text-retrieval-response xmlns="http://www.elsevier.com/xml/svapi/article/dtd" xmlns:bk="http://www.elsevier.com/xml/bk/dtd" xmlns:cals="http://www.elsevier.com/xml/common/cals/dtd" xmlns:ce="http://www.elsevier.com/xml/common/dtd" xmlns:ja="http://www.elsevier.com/xml/ja/dtd" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:sa="http://www.elsevier.com/xml/common/struct-aff/dtd" xmlns:sb="http://www.elsevier.com/xml/common/struct-bib/dtd" xmlns:tb="http://www.elsevier.com/xml/common/table/dtd" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xocs="http://www.elsevier.com/xml/xocs/dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prism="http://prismstandard.org/namespaces/basic/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><coredata><prism:url>https://api.else