In [1]:
####################################################
## Demo of How to Extract Data from Scientometric APIs or Sources
##
## 1) rScopus (Publication-level and Author-level Data)
## 2) rAltmetric (Publication-level Data)
## 3) SciImago (Journal-level Data)
##
## Author: Christopher Meaney
## Date: March 2022
####################################################

In [2]:
## Options for how many rows/cols displayed when printing matrices/dataframes/etc. in Jupyter
options(repr.matrix.max.rows=100) 
options(repr.matrix.max.cols=50) 

In [3]:
## Package dependencies
library(rscopus)
library(rAltmetric)

"package 'rscopus' was built under R version 4.0.5"


In [4]:
#######################################################
## Quick Look at rscopus API
########################################################

In [5]:
##
## Set API key --- https://dev.elsevier.com/
##
api_key <- set_api_key("enter_your_api_key")
api_key

NULL

In [6]:
##
## Test queries
## 
## Note: you can obtain Scopus identifiers using: https://www.scopus.com/freelookup/form/author.uri
##

## Christopher Meaney scopusID
cm_id <- "35765360800"
## Rahim Moineddin scopusID
rm_id <- "6603577135"
## Eva Grunfeld scopusID
eg_id <- "7004206728"
## Peter Selby scopusID
ps_id <- "35465440100"

In [7]:
## Grab the papers associated with particular scopusID
papers <- author_df(au_id=ps_id, api_key=api_key, verbose=FALSE)
# str(papers)

In [8]:
dim(papers)

In [9]:
data.frame(scopus_vars=names(papers))

scopus_vars
<chr>
@_fa
prism:url
dc:identifier
eid
dc:title
dc:creator
prism:publicationName
prism:issn
prism:eIssn
prism:volume


In [10]:
##############################################
##
## Scopus abstract retrieval by DOI --- collects additional publication level information not included in author_df call above
##
##############################################

In [11]:
## eid corresponding to paper I collaborated on with Alan Monavari on obesity over time in CPCSSN data
test_eid <- "2-s2.0-85124776043"

In [12]:
abstract <- abstract_retrieval(id=test_eid, identifier="eid", http_end=NULL)
#str(abstract)

HTTP specified is:https://api.elsevier.com/content/abstract/eid/2-s2.0-85124776043




In [13]:
names(abstract)

In [14]:
names(abstract$content)

In [15]:
names(abstract[["content"]][["abstracts-retrieval-response"]])

In [16]:
## Author Info
str(abstract[["content"]][["abstracts-retrieval-response"]][["authors"]][["author"]])

List of 6
 $ :List of 11
  ..$ ce:given-name  : chr "Hamidreza"
  ..$ preferred-name :List of 4
  .. ..$ ce:given-name  : chr "Hamidreza"
  .. ..$ ce:initials    : chr "H."
  .. ..$ ce:surname     : chr "Goodarzynejad"
  .. ..$ ce:indexed-name: chr "Goodarzynejad H."
  ..$ @seq           : chr "1"
  ..$ ce:initials    : chr "H."
  ..$ @_fa           : chr "true"
  ..$ affiliation    :List of 2
  .. ..$ @id  : chr "60122754"
  .. ..$ @href: chr "https://api.elsevier.com/content/affiliation/affiliation_id/60122754"
  ..$ ce:degrees     : chr "MD PhD"
  ..$ ce:surname     : chr "Goodarzynejad"
  ..$ @auid          : chr "13609367900"
  ..$ author-url     : chr "https://api.elsevier.com/content/author/author_id/13609367900"
  ..$ ce:indexed-name: chr "Goodarzynejad H."
 $ :List of 11
  ..$ ce:given-name  : chr "Christopher"
  ..$ preferred-name :List of 4
  .. ..$ ce:given-name  : chr "Christopher"
  .. ..$ ce:initials    : chr "C."
  .. ..$ ce:surname     : chr "Meaney"
  .. ..$ ce:indexe

In [17]:
## Affiliation Info
str(abstract[["content"]][["abstracts-retrieval-response"]][["affiliation"]])

List of 4
 $ :List of 5
  ..$ affiliation-city   : chr "Tehran"
  ..$ @id                : chr "60122754"
  ..$ affilname          : chr "Tehran Heart Center"
  ..$ @href              : chr "https://api.elsevier.com/content/affiliation/affiliation_id/60122754"
  ..$ affiliation-country: chr "Iran"
 $ :List of 5
  ..$ affiliation-city   : chr "Toronto"
  ..$ @id                : chr "60021600"
  ..$ affilname          : chr "University of Toronto Faculty of Medicine"
  ..$ @href              : chr "https://api.elsevier.com/content/affiliation/affiliation_id/60021600"
  ..$ affiliation-country: chr "Canada"
 $ :List of 5
  ..$ affiliation-city   : chr "Guelph"
  ..$ @id                : chr "60015881"
  ..$ affilname          : chr "University of Guelph"
  ..$ @href              : chr "https://api.elsevier.com/content/affiliation/affiliation_id/60015881"
  ..$ affiliation-country: chr "Canada"
 $ :List of 5
  ..$ affiliation-city   : chr "Markham"
  ..$ @id                : chr "60009031

In [18]:
#######################################################################
##
## Note: There exist various APIs you can use to extract bibliometric information on peer-reviewed academic publications
##
## 1) rscopus: https://cran.r-project.org/web/packages/rscopus/index.html
## 2) easyPubmed: https://cran.r-project.org/web/packages/easyPubMed/index.html
## 3) wosr: https://cran.r-project.org/web/packages/wosr/index.html
##
#######################################################################

In [19]:
###############################################
##
## Leverage the document level indicator information ("doi","eid",etc.) to access Almetrics.com API
##
###############################################

In [20]:
## List of DOIs and publication titles
head(papers[,c("prism:doi","dc:title")], n=3)

Unnamed: 0_level_0,prism:doi,dc:title
Unnamed: 0_level_1,<chr>,<chr>
1,10.1016/j.drugalcdep.2022.109312,Evaluating the effectiveness of bupropion and varenicline for smoking cessation using an internet-based delivery system: A pragmatic randomized controlled trial (MATCH study)
2,10.1016/j.psc.2021.11.006,Coronavirus Disease 2019 and the Impact on Substance Use Disorder Treatments
3,10.1093/ntr/ntab235,A Measure of Illness Awareness in Individuals With Nicotine Dependence-Nicotine Use Awareness and Insight Scale


In [21]:
## An article I collaborated on with Milena Forte on child vaccinations during COVID-19
child_vax_covid <- altmetrics(doi="10.9778/cmajo.20210084")
child_vax_covid

Altmetrics on: "Routine childhood vaccination rates in an academic family health team before and during the first wave of the COVID-19 pandemic: a pre–post analysis of a retrospective chart review" with altmetric_id: 121551310 published in CMAJ Open.
                        stats
cited_by_posts_count       28
cited_by_tweeters_count    22
cited_by_accounts_count    22

In [22]:
## An article I collaborated on with Nav Persaud investigating effectiveness of diclectin for treatment nausea during pregnancy
diclectin <- altmetrics(doi="10.1371/journal.pone.0189978")
diclectin

Altmetrics on: "Doxylamine-pyridoxine for nausea and vomiting of pregnancy randomized placebo controlled trial: Prespecified analyses and reanalysis" with altmetric_id: 31816358 published in PLOS ONE.
                         stats
cited_by_posts_count       101
cited_by_msm_count          56
cited_by_feeds_count         4
cited_by_tweeters_count     33
cited_by_fbwalls_count       2
cited_by_wikipedia_count     1
cited_by_policies_count      1
cited_by_accounts_count     97

In [23]:
######################################################################
##
## Note: There exist various APIs one can engage with (via R) in order to extract ALtmetric/Webometric data
##
## 1) rAltmetric: https://cran.r-project.org/web/packages/rAltmetric/README.html
## 2) dimensionsR: https://github.com/massimoaria/dimensionsR
## 3) Plum Analytics (no R package): https://plumanalytics.com/announcing-plumx-widgets-and-our-open-data-api/
##
######################################################################

In [24]:
#######################################################
##
## Journal data from SciImago: 
## https://www.scimagojr.com/journalrank.php
##
#######################################################

In [25]:
sjr <- read.csv(url("https://www.scimagojr.com/journalrank.php?out=xls"), sep=";", header=TRUE)
#str(sjr)

In [26]:
dim(sjr)

In [27]:
data.frame(sjr_vars=names(sjr))

sjr_vars
<chr>
Rank
Sourceid
Title
Type
Issn
SJR
SJR.Best.Quartile
H.index
Total.Docs...2020.
Total.Docs...3years.


In [28]:
## Look at some journal titles
sjr[sjr$Title=="Nature Medicine", ]

Unnamed: 0_level_0,Rank,Sourceid,Title,Type,Issn,SJR,SJR.Best.Quartile,H.index,Total.Docs...2020.,Total.Docs...3years.,Total.Refs.,Total.Cites..3years.,Citable.Docs...3years.,Cites...Doc...2years.,Ref....Doc.,Country,Region,Publisher,Coverage,Categories
Unnamed: 0_level_1,<int>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
17,17,15819,Nature Medicine,journal,"1546170X, 10788956",19536,Q1,547,452,953,10601,22548,664,2352,2345,United Kingdom,Western Europe,Nature Publishing Group,1995-2020,"Biochemistry, Genetics and Molecular Biology (miscellaneous) (Q1); Medicine (miscellaneous) (Q1)"


In [29]:
##################################################################
##
## It's a bit weird because ISSN is stored as "," comma-separated string. 
## Need to parse these string data, represent SciImago dataframe in long format (expanding on ISSN). 
## Remove duplicate ISSN. 
## Then join with rScopus data on ISSN.
##
## note: I **think** the different ISSN in the "," comma-separated list is ISSN vs. e-ISSN, etc.
##
##################################################################

In [30]:
##################################################################
##
## Note: Could also consider various APIs or data sources for generating journal-level metrics...
##
## 1) Clarivate (Thompson-Reuters) JCR: https://clarivate.com/webofsciencegroup/solutions/journal-citation-reports/
## 2) Eigenfactor: http://www.eigenfactor.org/
##
##################################################################

In [31]:
############################
## System information
############################

In [32]:
Sys.time()

[1] "2022-03-24 15:26:42 EDT"

In [33]:
data.frame(Sys.info())

Unnamed: 0_level_0,Sys.info..
Unnamed: 0_level_1,<chr>
sysname,Windows
release,10 x64
version,build 19044
nodename,DESKTOP-F2PP5TP
machine,x86-64
login,ChristopherMeaney
user,ChristopherMeaney
effective_user,ChristopherMeaney


In [34]:
version

               _                           
platform       x86_64-w64-mingw32          
arch           x86_64                      
os             mingw32                     
system         x86_64, mingw32             
status                                     
major          4                           
minor          0.2                         
year           2020                        
month          06                          
day            22                          
svn rev        78730                       
language       R                           
version.string R version 4.0.2 (2020-06-22)
nickname       Taking Off Again            