In [None]:
from pybliometrics.scopus import ScopusSearch

In [None]:
# A bug seems to prevent the library to locate the API key.
# This might help.
! export PYB_CONFIG_FILE=~/.pybliometrics/config.ini

# Query string

In [None]:
# Define the search string to use. Any query that works on Advanced Search
# https://www.scopus.com/search/form.uri?display=advanced will also work here.
query = "reproducible AND eDNA"

# Search Scopus

In [None]:
# ScopusSearch(query, 
#              refresh=False, 
#              view=None, 
#              verbose=False, 
#              download=True, 
#              integrity_fields=None, 
#              integrity_action='raise', 
#              subscriber=True, **kwds)
# https://pybliometrics.readthedocs.io/en/stable/classes/ScopusSearch.html

search = ScopusSearch(query, refresh=True, view="COMPLETE", verbose=True)

# Explore the result

In [None]:
# More information about the atributes of the result can be found at
# https://dev.elsevier.com/sc_search_views.html 
# Title
print(search.results[0].title)

# Author keywords
print(search.results[0].authkeywords)

# Abstract
print(search.results[0].description)

# DOI
print(search.results[0].doi)

# eid
print(search.results[0].eid)

# pii
print(search.results[0].pii)

# Journal
print(search.results[0].publicationName)

# Date
print(search.results[0].coverDate)


In [None]:
# Display the number of results found for this search string
search.get_results_size()

# Explore the result in Pandas

In [None]:
# Convert the search result to a Pandas dataframe 
import pandas as pd
df = pd.DataFrame(pd.DataFrame(search.results))

In [None]:
df

In [None]:
# Find papers with specific keywords
import numpy as np

keyword = "zeta diversity"
df[df.authkeywords.str.contains(keyword).replace(np.nan, False)]

# Explore keywords

In [None]:
# Recursive flattening of "list of lists"
# https://stackabuse.com/python-how-to-flatten-list-of-lists/
def flatten_list(list_of_lists):
    if len(list_of_lists) == 0:
        return list_of_lists
    if isinstance(list_of_lists[0], list):
        return flatten_list(list_of_lists[0]) + flatten_list(list_of_lists[1:])
    return list_of_lists[:1] + flatten_list(list_of_lists[1:])

# Combine all keywords in a list of lists
regular_list = df.authkeywords.str.split("|").to_list()
# Flatten the list of lists
fl = flatten_list(regular_list)
# Remove leading and trailing white space from keywords
fl = [str(i).strip() for i in fl]

# Count the keywords
count = {}
for i in fl:
    i = str(i)
    try:
        count[i] += 1
    except:
        count[i] = 1

# Turn the dictionary of keywords into a Pandas dataframe
kw = pd.DataFrame.from_dict(count, orient='index')

# Display the result
kw.sort_values(0, ascending=False).head(20)

# Explore abstracts in html format

In [None]:
# Write title, Elsevier ID, keywords and Abstract to an html file
original_abstracts = open("original_abstracts.html", "w")

with open("original_abstracts.html", "w") as file:
    for paper in search.results:
        title = "<h1>" + paper.title + "</h1>" + "\n"
        eid = "<p>" + paper.eid + "</p>" + "\n"
        try:
            doi = "<p><a href=\"https://doi.org/" + paper.doi + "\">" + "doi:" + paper.doi + "</a></p>" + "\n"
        except TypeError:
            doi = "<p>No DOI</p>"
        if paper.authkeywords is None:
            keywords = "<p>No keywords</p>"
        else:
            keywords = paper.authkeywords
        abstract = "<p>" + str(paper.description) + "</p>" + "\n"
        string = title + eid + doi + keywords + abstract
        
        file.write(string)

# Download fulltext results in XML format

In [None]:
import pybliometrics.scopus
import requests

# Option 1.

for paper in search.results:
    doi = paper.doi
    r = requests.get(f"https://api.elsevier.com/content/article/doi/{doi}?apiKey={pybliometrics.scopus.KEYS[0]}")
    print(str(r.content))