diff --git a/inst/vign/cache/__packages b/inst/vign/cache/__packages new file mode 100644 index 0000000..f6de0d8 --- /dev/null +++ b/inst/vign/cache/__packages @@ -0,0 +1,11 @@ +base +devtools +ggplot2 +knitr +Rdocumentation +roxygen2 +rplos +sacbox +slidify +slidifyLibraries +testthat diff --git a/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.RData b/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.RData new file mode 100644 index 0000000..c70385b Binary files /dev/null and b/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.RData differ diff --git a/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.rdb b/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.rdx b/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/facet1_255b7c0f39fa0b2a2bdc47d0305d39d5.rdx differ diff --git a/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.RData b/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.RData new file mode 100644 index 0000000..b9a4069 Binary files /dev/null and b/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.RData differ diff --git a/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.rdb b/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.rdx b/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/facet2_0a44bf510ecdaefd3256aa13473d9b70.rdx differ diff --git a/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.RData b/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.RData new file mode 100644 index 0000000..38bdfec Binary files /dev/null and b/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.RData differ diff --git a/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.rdb b/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.rdx b/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/facet3_0c85ace83332ed344204f5f6bee1e680.rdx differ diff --git a/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.RData b/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.RData new file mode 100644 index 0000000..049a329 Binary files /dev/null and b/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.RData differ diff --git a/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.rdb b/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.rdx b/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/high1_d9b394c8f3a4caa4dc0fb35052ae87e5.rdx differ diff --git a/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.RData b/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.RData new file mode 100644 index 0000000..9dc0923 Binary files /dev/null and b/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.RData differ diff --git a/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.rdb b/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.rdx b/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/high2_54b5d4fd5c9d6b688e2f0a1413c74c40.rdx differ diff --git a/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.RData b/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.RData new file mode 100644 index 0000000..b0ebf4f Binary files /dev/null and b/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.RData differ diff --git a/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.rdb b/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.rdx b/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/high3_09e5c4fae283af2093c323a337c5a3f9.rdx differ diff --git a/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.RData b/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.RData new file mode 100644 index 0000000..6b8bc10 Binary files /dev/null and b/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.RData differ diff --git a/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.rdb b/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.rdx b/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/plosabstract_e1567ca8c5a32a5d93cf62b37114c434.rdx differ diff --git a/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.RData b/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.RData new file mode 100644 index 0000000..49f2ba7 Binary files /dev/null and b/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.RData differ diff --git a/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.rdb b/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.rdx b/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/plosauthor_2e194e1f4967fc91858d139841e474af.rdx differ diff --git a/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.RData b/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.RData new file mode 100644 index 0000000..0bef06a Binary files /dev/null and b/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.RData differ diff --git a/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.rdb b/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.rdx b/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/plostitle_abfda9d90fd1a68afd14dbbe616d96b3.rdx differ diff --git a/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.RData b/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.RData new file mode 100644 index 0000000..b89f83c Binary files /dev/null and b/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.RData differ diff --git a/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.rdb b/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.rdx b/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/plosword2_101803b4bb73a0b46dedfe7214318f60.rdx differ diff --git a/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.RData b/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.RData new file mode 100644 index 0000000..dc81ee5 Binary files /dev/null and b/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.RData differ diff --git a/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.rdb b/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.rdb new file mode 100644 index 0000000..71316bd Binary files /dev/null and b/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.rdb differ diff --git a/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.rdx b/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.rdx new file mode 100644 index 0000000..3192b18 Binary files /dev/null and b/inst/vign/cache/plosword_ea0042bb24956f9299706aa1506eb6ce.rdx differ diff --git a/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.RData b/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.RData new file mode 100644 index 0000000..2775c60 Binary files /dev/null and b/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.RData differ diff --git a/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.rdb b/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.rdx b/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/searchplos1_9e304ad557354c11a0649b4be8d57a36.rdx differ diff --git a/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.RData b/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.RData new file mode 100644 index 0000000..72897d0 Binary files /dev/null and b/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.RData differ diff --git a/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.rdb b/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.rdx b/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/searchplos2_b2c2d255e60015b2c0383df9859ddb3f.rdx differ diff --git a/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.RData b/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.RData new file mode 100644 index 0000000..fea9d76 Binary files /dev/null and b/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.RData differ diff --git a/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.rdb b/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.rdx b/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/searchplos3_0b8abb153224a33db612c30320ff76d7.rdx differ diff --git a/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.RData b/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.RData new file mode 100644 index 0000000..0a332d3 Binary files /dev/null and b/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.RData differ diff --git a/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.rdb b/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.rdx b/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/searchplos4_e8e777407ff47d1af2cb51c0cc5d235a.rdx differ diff --git a/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.RData b/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.RData new file mode 100644 index 0000000..841ce63 Binary files /dev/null and b/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.RData differ diff --git a/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.rdb b/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.rdb new file mode 100644 index 0000000..f3d09ed Binary files /dev/null and b/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.rdb differ diff --git a/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.rdx b/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.rdx new file mode 100644 index 0000000..76a2274 Binary files /dev/null and b/inst/vign/cache/searchplos5_f2ca7a13e445edd60b4a56e5a077f2f1.rdx differ diff --git a/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.RData b/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.RData new file mode 100644 index 0000000..1afed83 Binary files /dev/null and b/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.RData differ diff --git a/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.rdb b/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.rdx b/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/throughtime1_24a4318a8fba9a81955f48e1af7e59f6.rdx differ diff --git a/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.RData b/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.RData new file mode 100644 index 0000000..6700feb Binary files /dev/null and b/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.RData differ diff --git a/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.rdb b/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.rdb new file mode 100644 index 0000000..e69de29 diff --git a/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.rdx b/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.rdx new file mode 100644 index 0000000..e6f320d Binary files /dev/null and b/inst/vign/cache/throughtime2_a7d7c45426d64295bc011d87039c940f.rdx differ diff --git a/inst/vign/figure/plosword1plot.png b/inst/vign/figure/plosword1plot.png new file mode 100644 index 0000000..3b933c9 Binary files /dev/null and b/inst/vign/figure/plosword1plot.png differ diff --git a/inst/vign/figure/throughtime1.png b/inst/vign/figure/throughtime1.png new file mode 100644 index 0000000..c3bc9da Binary files /dev/null and b/inst/vign/figure/throughtime1.png differ diff --git a/inst/vign/rplos_vignette.Rmd b/inst/vign/rplos_vignette.Rmd new file mode 100644 index 0000000..c275db7 --- /dev/null +++ b/inst/vign/rplos_vignette.Rmd @@ -0,0 +1,171 @@ + + +rplos tutorial +===== + +The `rplos` package interacts with the API services of [PLoS](http://www.plos.org/) (Public Library of Science) Journals. In order to use `rplos`, you need to obtain [your own key](http://api.plos.org/registration/) to their API services. Instruction for obtaining and installing keys so they load automatically when you launch R are on our GitHub Wiki page [Installation and use of API keys](https://github.com/ropensci/rOpenSci/wiki/Installation-and-use-of-API-keys). + +This tutorial will go through three use cases to demonstrate the kinds +of things possible in `rplos`. + +* Search across PLoS papers in various sections of papers +* Search for terms and visualize results as a histogram OR as a plot through time +* Text mining of scientific literature + +### Load package from CRAN + +```{r install, eval=FALSE} +install.packages("rplos") +``` + +```{r load, message=FALSE, warning=FALSE} +library(rplos) +``` + +### Search across PLoS papers in various sections of papers + +`searchplos` is a general search, and in this case searches for the term +**Helianthus** and returns the DOI's of matching papers + +```{r searchplos1, message=FALSE, warning=FALSE, cache=FALSE} +searchplos(q= "Helianthus", fl= "id", limit = 5) +``` + +Get only full article DOIs + +```{r searchplos2, message=FALSE, warning=FALSE, cache=FALSE} +searchplos(q="*:*", fl='id', fq='doc_type:full', start=0, limit=5) +``` + +Get DOIs for only PLoS One articles + +```{r searchplos3, message=FALSE, warning=FALSE, cache=FALSE} +searchplos(q="*:*", fl='id', fq='cross_published_journal_key:PLoSONE', start=0, limit=5) +``` + +Get DOIs for full article in PLoS One + +```{r searchplos4, message=FALSE, warning=FALSE, cache=FALSE} +searchplos(q="*:*", fl='id', + fq=list('cross_published_journal_key:PLoSONE', 'doc_type:full'), + start=0, limit=5) +``` + +Serch for many terms + +```{r searchplos5, message=FALSE, warning=FALSE, cache=FALSE} +q <- c('ecology','evolution','science') +lapply(q, function(x) searchplos(x, limit=2)) +``` + +### Search on specific sections + +A suite of functions were created as light wrappers around `searchplos` as a shorthand to search specific sections of a paper. + +* `plosauthor` searchers in authors +* `plosabstract` searches in abstracts +* `plostitle` searches in titles +* `plosfigtabcaps` searches in figure and table captions +* `plossubject` searches in subject areas + +`plosauthor` searches across authors, and in this case returns the authors of the matching papers. the fl parameter determines what is returned + +```{r plosauthor, message=FALSE, warning=FALSE, cache=FALSE} +plosauthor(q = "Eisen", fl = "author", limit = 5) +``` + +`plosabstract` searches across abstracts, and in this case returns the id and title of the matching papers + +```{r plosabstract, message=FALSE, warning=FALSE, cache=FALSE} +plosabstract(q = 'drosophila', fl='id,title', limit = 5) +``` + +`plostitle` searches across titles, and in this case returns the title and journal of the matching papers + +```{r plostitle, message=FALSE, warning=FALSE, cache=FALSE} +plostitle(q='drosophila', fl='title,journal', limit=5) +``` + +### Faceted search + +Facet by journal + +```{r facet1, message=FALSE, warning=FALSE, cache=FALSE} +facetplos(q='*:*', facet.field='journal') +``` + +Using `facet.query` to get counts + +```{r facet2, message=FALSE, warning=FALSE, cache=FALSE} +facetplos(q='*:*', facet.field='journal', facet.query='cell,bird') +``` + +Date faceting + +```{r facet3, message=FALSE, warning=FALSE, cache=FALSE} +facetplos(q='*:*', url=url, facet.date='publication_date', + facet.date.start='NOW/DAY-5DAYS', facet.date.end='NOW', facet.date.gap='+1DAY') +``` + +### Highlighted search + +Search for the term _alcohol_ in the abstracts of articles, return only 10 results + +```{r high1, message=FALSE, warning=FALSE, cache=FALSE} +highplos(q='alcohol', hl.fl = 'abstract', rows=2) +``` + +Search for the term _alcohol_ in the abstracts of articles, and return fragment size of 20 characters, return only 5 results + +```{r high2, message=FALSE, warning=FALSE, cache=FALSE} +highplos(q='alcohol', hl.fl='abstract', hl.fragsize=20, rows=2) +``` + +Search for the term _experiment_ across all sections of an article, return id (DOI) and title fl only, search in full articles only (via `fq='doc_type:full'`), and return only 10 results + +```{r high3, message=FALSE, warning=FALSE, cache=FALSE} +highplos(q='everything:"experiment"', fl='id,title', fq='doc_type:full', + rows=2) +``` + +### Search for terms and visualize results as a histogram OR as a plot through time + +`plosword` allows you to search for 1 to K words and visualize the results +as a histogram, comparing number of matching papers for each word + +```{r plosword1, message=FALSE, warning=FALSE, cache=FALSE} +out <- plosword(list("monkey", "Helianthus", "sunflower", "protein", "whale"), + vis = "TRUE") +out$table +``` + +```{r plosword1plot, message=FALSE, warning=FALSE, cache=FALSE, fig.width=6, fig.height=4} +out$plot +``` + +You can also pass in curl options, in this case get verbose information on the curl call. + +```{r plosword2, message=FALSE, warning=FALSE, cache=FALSE} +plosword('Helianthus', callopts=list(verbose=TRUE)) +``` + +### Visualize terms + +`plot_througtime` allows you to search for up to 2 words and visualize the results as a line plot through time, comparing number of articles matching through time. Visualize with the ggplot2 package, only up to two terms for now. + +```{r throughtime1, message=FALSE, warning=FALSE, cache=FALSE, fig.width=6, fig.height=4} +plot_throughtime(terms = "phylogeny", limit = 200) +``` + +OR using google visualizations through the googleVis package, check it your self using, e.g. (not shown here) + +```{r gvis, eval=FALSE} +plot_throughtime(terms = list("drosophila", "flower"), limit = 200, gvis = TRUE) +``` + +...And a google visualization will render on your local browser and you +can play with three types of plots (point, histogram, line), all through +time. The plot is not shown here, but try it out for yourself!! \ No newline at end of file diff --git a/inst/vign/rplos_vignette.md b/inst/vign/rplos_vignette.md new file mode 100644 index 0000000..64a7a0f --- /dev/null +++ b/inst/vign/rplos_vignette.md @@ -0,0 +1,432 @@ + + +rplos tutorial +===== + +The `rplos` package interacts with the API services of [PLoS](http://www.plos.org/) (Public Library of Science) Journals. In order to use `rplos`, you need to obtain [your own key](http://api.plos.org/registration/) to their API services. Instruction for obtaining and installing keys so they load automatically when you launch R are on our GitHub Wiki page [Installation and use of API keys](https://github.com/ropensci/rOpenSci/wiki/Installation-and-use-of-API-keys). + +This tutorial will go through three use cases to demonstrate the kinds +of things possible in `rplos`. + +* Search across PLoS papers in various sections of papers +* Search for terms and visualize results as a histogram OR as a plot through time +* Text mining of scientific literature + +### Load package from CRAN + + +```r +install.packages("rplos") +``` + + + +```r +library(rplos) +``` + + +### Search across PLoS papers in various sections of papers + +`searchplos` is a general search, and in this case searches for the term +**Helianthus** and returns the DOI's of matching papers + + +```r +searchplos(q = "Helianthus", fl = "id", limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0057533 +## 2 10.1371/journal.pone.0045899 +## 3 10.1371/journal.pone.0037191 +## 4 10.1371/journal.pone.0051360 +## 5 10.1371/journal.pone.0070347 +``` + + +Get only full article DOIs + + +```r +searchplos(q = "*:*", fl = "id", fq = "doc_type:full", start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pntd.0001525 +## 2 10.1371/journal.pone.0049273 +## 3 10.1371/journal.pone.0031364 +## 4 10.1371/journal.pone.0005841 +## 5 10.1371/journal.pone.0005838 +``` + + +Get DOIs for only PLoS One articles + + +```r +searchplos(q = "*:*", fl = "id", fq = "cross_published_journal_key:PLoSONE", + start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0049274/title +## 2 10.1371/journal.pone.0049274/abstract +## 3 10.1371/journal.pone.0049274/references +## 4 10.1371/journal.pone.0049274/body +## 5 10.1371/journal.pone.0049274/introduction +``` + + +Get DOIs for full article in PLoS One + + +```r +searchplos(q = "*:*", fl = "id", fq = list("cross_published_journal_key:PLoSONE", + "doc_type:full"), start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0049273 +## 2 10.1371/journal.pone.0031364 +## 3 10.1371/journal.pone.0005841 +## 4 10.1371/journal.pone.0005838 +## 5 10.1371/journal.pone.0074814 +``` + + +Serch for many terms + + +```r +q <- c("ecology", "evolution", "science") +lapply(q, function(x) searchplos(x, limit = 2)) +``` + +``` +## [[1]] +## id +## 1 10.1371/journal.pone.0059813 +## 2 10.1371/journal.pone.0001248 +## +## [[2]] +## id +## 1 10.1371/journal.pbio.0050030 +## 2 10.1371/journal.pbio.0030245 +## +## [[3]] +## id +## 1 10.1371/journal.pbio.0020122 +## 2 10.1371/journal.pbio.1001166 +``` + + +### Search on specific sections + +A suite of functions were created as light wrappers around `searchplos` as a shorthand to search specific sections of a paper. + +* `plosauthor` searchers in authors +* `plosabstract` searches in abstracts +* `plostitle` searches in titles +* `plosfigtabcaps` searches in figure and table captions +* `plossubject` searches in subject areas + +`plosauthor` searches across authors, and in this case returns the authors of the matching papers. the fl parameter determines what is returned + + +```r +plosauthor(q = "Eisen", fl = "author", limit = 5) +``` + +``` +## author +## 1 Jonathan A Eisen +## 2 Jonathan A Eisen +## 3 Jonathan A Eisen +## 4 Jonathan A Eisen +## 5 Jonathan A Eisen +``` + + +`plosabstract` searches across abstracts, and in this case returns the id and title of the matching papers + + +```r +plosabstract(q = "drosophila", fl = "id,title", limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pbio.0040198 +## 2 10.1371/journal.pbio.0030246 +## 3 10.1371/journal.pone.0012421 +## 4 10.1371/journal.pbio.1000318 +## 5 10.1371/journal.pbio.0030389 +## title +## 1 All for All +## 2 School Students as Drosophila Experimenters +## 3 Host Range and Specificity of the Drosophila C Virus +## 4 Genomic Responses to Abnormal Gene Dosage: The X Chromosome Improved on a Common Strategy +## 5 New Environments Set the Stage for Changing Tastes in Mates +``` + + +`plostitle` searches across titles, and in this case returns the title and journal of the matching papers + + +```r +plostitle(q = "drosophila", fl = "title,journal", limit = 5) +``` + +``` +## journal +## 1 PLoS Computational Biology +## 2 PLoS Biology +## 3 PLoS Genetics +## 4 PLoS ONE +## 5 PLoS Biology +## title +## 1 Parametric Alignment of Drosophila Genomes +## 2 School Students as Drosophila Experimenters +## 3 Phenotypic Plasticity of the Drosophila Transcriptome +## 4 A Tripartite Synapse Model in Drosophila +## 5 Expression in Aneuploid Drosophila S2 Cells +``` + + +### Faceted search + +Facet by journal + + +```r +facetplos(q = "*:*", facet.field = "journal") +``` + +``` +## $facet_queries +## NULL +## +## $facet_fields +## $facet_fields$journal +## X1 X2 +## 1 plos one 704650 +## 2 plos genetics 34218 +## 3 plos pathogens 29964 +## 4 plos computational biology 25417 +## 5 plos biology 24257 +## 6 plos neglected tropical diseases 19520 +## 7 plos medicine 17232 +## 8 plos clinical trials 521 +## 9 plos medicin 9 +## 10 plos collections 5 +## +## +## $facet_dates +## NULL +## +## $facet_ranges +## NULL +``` + + +Using `facet.query` to get counts + + +```r +facetplos(q = "*:*", facet.field = "journal", facet.query = "cell,bird") +``` + +``` +## $facet_queries +## term value +## 1 cell 83275 +## 2 bird 8353 +## +## $facet_fields +## $facet_fields$journal +## X1 X2 +## 1 plos one 704650 +## 2 plos genetics 34218 +## 3 plos pathogens 29964 +## 4 plos computational biology 25417 +## 5 plos biology 24257 +## 6 plos neglected tropical diseases 19520 +## 7 plos medicine 17232 +## 8 plos clinical trials 521 +## 9 plos medicin 9 +## 10 plos collections 5 +## +## +## $facet_dates +## NULL +## +## $facet_ranges +## NULL +``` + + +Date faceting + + +```r +facetplos(q = "*:*", url = url, facet.date = "publication_date", facet.date.start = "NOW/DAY-5DAYS", + facet.date.end = "NOW", facet.date.gap = "+1DAY") +``` + +``` +## $facet_queries +## NULL +## +## $facet_fields +## NULL +## +## $facet_dates +## $facet_dates$publication_date +## date value +## 1 2014-02-12T00:00:00Z 2508 +## 2 2014-02-13T00:00:00Z 2334 +## 3 2014-02-14T00:00:00Z 968 +## 4 2014-02-15T00:00:00Z 0 +## 5 2014-02-16T00:00:00Z 0 +## 6 2014-02-17T00:00:00Z 0 +## +## +## $facet_ranges +## NULL +``` + + +### Highlighted search + +Search for the term _alcohol_ in the abstracts of articles, return only 10 results + + +```r +highplos(q = "alcohol", hl.fl = "abstract", rows = 2) +``` + +``` +## $`10.1371/journal.pmed.0040151` +## $`10.1371/journal.pmed.0040151`$abstract +## [1] "Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting" +## +## +## $`10.1371/journal.pone.0027752` +## $`10.1371/journal.pone.0027752`$abstract +## [1] "Background: The negative influences of alcohol on TB management with regard to delays in seeking" +``` + + +Search for the term _alcohol_ in the abstracts of articles, and return fragment size of 20 characters, return only 5 results + + +```r +highplos(q = "alcohol", hl.fl = "abstract", hl.fragsize = 20, rows = 2) +``` + +``` +## $`10.1371/journal.pmed.0040151` +## $`10.1371/journal.pmed.0040151`$abstract +## [1] "Background: Alcohol" +## +## +## $`10.1371/journal.pone.0027752` +## $`10.1371/journal.pone.0027752`$abstract +## [1] " of alcohol on TB management" +``` + + +Search for the term _experiment_ across all sections of an article, return id (DOI) and title fl only, search in full articles only (via `fq='doc_type:full'`), and return only 10 results + + +```r +highplos(q = "everything:\"experiment\"", fl = "id,title", fq = "doc_type:full", + rows = 2) +``` + +``` +## $`10.1371/journal.pone.0039681` +## $`10.1371/journal.pone.0039681`$everything +## [1] " Selection of Transcriptomics Experiments Improves Guilt-by-Association Analyses Transcriptomics Experiment" +## +## +## $`10.1371/journal.pone.0051016` +## $`10.1371/journal.pone.0051016`$everything +## [1] " Evolutionary Biology Breeding Experience Might Be a Major Determinant of Breeding Probability in Long-Lived" +``` + + +### Search for terms and visualize results as a histogram OR as a plot through time + +`plosword` allows you to search for 1 to K words and visualize the results +as a histogram, comparing number of matching papers for each word + + +```r +out <- plosword(list("monkey", "Helianthus", "sunflower", "protein", "whale"), + vis = "TRUE") +out$table +``` + +``` +## No_Articles Term +## 1 6894 monkey +## 2 230 Helianthus +## 3 607 sunflower +## 4 73707 protein +## 5 797 whale +``` + + + +```r +out$plot +``` + +![plot of chunk plosword1plot](figure/plosword1plot.png) + + +You can also pass in curl options, in this case get verbose information on the curl call. + + +```r +plosword("Helianthus", callopts = list(verbose = TRUE)) +``` + +``` +## Number of articles with search term +## 230 +``` + + +### Visualize terms + +`plot_througtime` allows you to search for up to 2 words and visualize the results as a line plot through time, comparing number of articles matching through time. Visualize with the ggplot2 package, only up to two terms for now. + + +```r +plot_throughtime(terms = "phylogeny", limit = 200) +``` + +![plot of chunk throughtime1](figure/throughtime1.png) + + +OR using google visualizations through the googleVis package, check it your self using, e.g. (not shown here) + + +```r +plot_throughtime(terms = list("drosophila", "flower"), limit = 200, gvis = TRUE) +``` + + +...And a google visualization will render on your local browser and you +can play with three types of plots (point, histogram, line), all through +time. The plot is not shown here, but try it out for yourself!! diff --git a/vignettes/figure/plosword1plot.png b/vignettes/figure/plosword1plot.png new file mode 100644 index 0000000..3b933c9 Binary files /dev/null and b/vignettes/figure/plosword1plot.png differ diff --git a/vignettes/figure/throughtime1.png b/vignettes/figure/throughtime1.png new file mode 100644 index 0000000..c3bc9da Binary files /dev/null and b/vignettes/figure/throughtime1.png differ diff --git a/vignettes/margins.sty b/vignettes/margins.sty new file mode 100644 index 0000000..9e8253b --- /dev/null +++ b/vignettes/margins.sty @@ -0,0 +1 @@ +\usepackage[vmargin=1in,hmargin=1in]{geometry} \ No newline at end of file diff --git a/vignettes/rplos_vignette.Rmd b/vignettes/rplos_vignette.Rmd new file mode 100644 index 0000000..64a7a0f --- /dev/null +++ b/vignettes/rplos_vignette.Rmd @@ -0,0 +1,432 @@ + + +rplos tutorial +===== + +The `rplos` package interacts with the API services of [PLoS](http://www.plos.org/) (Public Library of Science) Journals. In order to use `rplos`, you need to obtain [your own key](http://api.plos.org/registration/) to their API services. Instruction for obtaining and installing keys so they load automatically when you launch R are on our GitHub Wiki page [Installation and use of API keys](https://github.com/ropensci/rOpenSci/wiki/Installation-and-use-of-API-keys). + +This tutorial will go through three use cases to demonstrate the kinds +of things possible in `rplos`. + +* Search across PLoS papers in various sections of papers +* Search for terms and visualize results as a histogram OR as a plot through time +* Text mining of scientific literature + +### Load package from CRAN + + +```r +install.packages("rplos") +``` + + + +```r +library(rplos) +``` + + +### Search across PLoS papers in various sections of papers + +`searchplos` is a general search, and in this case searches for the term +**Helianthus** and returns the DOI's of matching papers + + +```r +searchplos(q = "Helianthus", fl = "id", limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0057533 +## 2 10.1371/journal.pone.0045899 +## 3 10.1371/journal.pone.0037191 +## 4 10.1371/journal.pone.0051360 +## 5 10.1371/journal.pone.0070347 +``` + + +Get only full article DOIs + + +```r +searchplos(q = "*:*", fl = "id", fq = "doc_type:full", start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pntd.0001525 +## 2 10.1371/journal.pone.0049273 +## 3 10.1371/journal.pone.0031364 +## 4 10.1371/journal.pone.0005841 +## 5 10.1371/journal.pone.0005838 +``` + + +Get DOIs for only PLoS One articles + + +```r +searchplos(q = "*:*", fl = "id", fq = "cross_published_journal_key:PLoSONE", + start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0049274/title +## 2 10.1371/journal.pone.0049274/abstract +## 3 10.1371/journal.pone.0049274/references +## 4 10.1371/journal.pone.0049274/body +## 5 10.1371/journal.pone.0049274/introduction +``` + + +Get DOIs for full article in PLoS One + + +```r +searchplos(q = "*:*", fl = "id", fq = list("cross_published_journal_key:PLoSONE", + "doc_type:full"), start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0049273 +## 2 10.1371/journal.pone.0031364 +## 3 10.1371/journal.pone.0005841 +## 4 10.1371/journal.pone.0005838 +## 5 10.1371/journal.pone.0074814 +``` + + +Serch for many terms + + +```r +q <- c("ecology", "evolution", "science") +lapply(q, function(x) searchplos(x, limit = 2)) +``` + +``` +## [[1]] +## id +## 1 10.1371/journal.pone.0059813 +## 2 10.1371/journal.pone.0001248 +## +## [[2]] +## id +## 1 10.1371/journal.pbio.0050030 +## 2 10.1371/journal.pbio.0030245 +## +## [[3]] +## id +## 1 10.1371/journal.pbio.0020122 +## 2 10.1371/journal.pbio.1001166 +``` + + +### Search on specific sections + +A suite of functions were created as light wrappers around `searchplos` as a shorthand to search specific sections of a paper. + +* `plosauthor` searchers in authors +* `plosabstract` searches in abstracts +* `plostitle` searches in titles +* `plosfigtabcaps` searches in figure and table captions +* `plossubject` searches in subject areas + +`plosauthor` searches across authors, and in this case returns the authors of the matching papers. the fl parameter determines what is returned + + +```r +plosauthor(q = "Eisen", fl = "author", limit = 5) +``` + +``` +## author +## 1 Jonathan A Eisen +## 2 Jonathan A Eisen +## 3 Jonathan A Eisen +## 4 Jonathan A Eisen +## 5 Jonathan A Eisen +``` + + +`plosabstract` searches across abstracts, and in this case returns the id and title of the matching papers + + +```r +plosabstract(q = "drosophila", fl = "id,title", limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pbio.0040198 +## 2 10.1371/journal.pbio.0030246 +## 3 10.1371/journal.pone.0012421 +## 4 10.1371/journal.pbio.1000318 +## 5 10.1371/journal.pbio.0030389 +## title +## 1 All for All +## 2 School Students as Drosophila Experimenters +## 3 Host Range and Specificity of the Drosophila C Virus +## 4 Genomic Responses to Abnormal Gene Dosage: The X Chromosome Improved on a Common Strategy +## 5 New Environments Set the Stage for Changing Tastes in Mates +``` + + +`plostitle` searches across titles, and in this case returns the title and journal of the matching papers + + +```r +plostitle(q = "drosophila", fl = "title,journal", limit = 5) +``` + +``` +## journal +## 1 PLoS Computational Biology +## 2 PLoS Biology +## 3 PLoS Genetics +## 4 PLoS ONE +## 5 PLoS Biology +## title +## 1 Parametric Alignment of Drosophila Genomes +## 2 School Students as Drosophila Experimenters +## 3 Phenotypic Plasticity of the Drosophila Transcriptome +## 4 A Tripartite Synapse Model in Drosophila +## 5 Expression in Aneuploid Drosophila S2 Cells +``` + + +### Faceted search + +Facet by journal + + +```r +facetplos(q = "*:*", facet.field = "journal") +``` + +``` +## $facet_queries +## NULL +## +## $facet_fields +## $facet_fields$journal +## X1 X2 +## 1 plos one 704650 +## 2 plos genetics 34218 +## 3 plos pathogens 29964 +## 4 plos computational biology 25417 +## 5 plos biology 24257 +## 6 plos neglected tropical diseases 19520 +## 7 plos medicine 17232 +## 8 plos clinical trials 521 +## 9 plos medicin 9 +## 10 plos collections 5 +## +## +## $facet_dates +## NULL +## +## $facet_ranges +## NULL +``` + + +Using `facet.query` to get counts + + +```r +facetplos(q = "*:*", facet.field = "journal", facet.query = "cell,bird") +``` + +``` +## $facet_queries +## term value +## 1 cell 83275 +## 2 bird 8353 +## +## $facet_fields +## $facet_fields$journal +## X1 X2 +## 1 plos one 704650 +## 2 plos genetics 34218 +## 3 plos pathogens 29964 +## 4 plos computational biology 25417 +## 5 plos biology 24257 +## 6 plos neglected tropical diseases 19520 +## 7 plos medicine 17232 +## 8 plos clinical trials 521 +## 9 plos medicin 9 +## 10 plos collections 5 +## +## +## $facet_dates +## NULL +## +## $facet_ranges +## NULL +``` + + +Date faceting + + +```r +facetplos(q = "*:*", url = url, facet.date = "publication_date", facet.date.start = "NOW/DAY-5DAYS", + facet.date.end = "NOW", facet.date.gap = "+1DAY") +``` + +``` +## $facet_queries +## NULL +## +## $facet_fields +## NULL +## +## $facet_dates +## $facet_dates$publication_date +## date value +## 1 2014-02-12T00:00:00Z 2508 +## 2 2014-02-13T00:00:00Z 2334 +## 3 2014-02-14T00:00:00Z 968 +## 4 2014-02-15T00:00:00Z 0 +## 5 2014-02-16T00:00:00Z 0 +## 6 2014-02-17T00:00:00Z 0 +## +## +## $facet_ranges +## NULL +``` + + +### Highlighted search + +Search for the term _alcohol_ in the abstracts of articles, return only 10 results + + +```r +highplos(q = "alcohol", hl.fl = "abstract", rows = 2) +``` + +``` +## $`10.1371/journal.pmed.0040151` +## $`10.1371/journal.pmed.0040151`$abstract +## [1] "Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting" +## +## +## $`10.1371/journal.pone.0027752` +## $`10.1371/journal.pone.0027752`$abstract +## [1] "Background: The negative influences of alcohol on TB management with regard to delays in seeking" +``` + + +Search for the term _alcohol_ in the abstracts of articles, and return fragment size of 20 characters, return only 5 results + + +```r +highplos(q = "alcohol", hl.fl = "abstract", hl.fragsize = 20, rows = 2) +``` + +``` +## $`10.1371/journal.pmed.0040151` +## $`10.1371/journal.pmed.0040151`$abstract +## [1] "Background: Alcohol" +## +## +## $`10.1371/journal.pone.0027752` +## $`10.1371/journal.pone.0027752`$abstract +## [1] " of alcohol on TB management" +``` + + +Search for the term _experiment_ across all sections of an article, return id (DOI) and title fl only, search in full articles only (via `fq='doc_type:full'`), and return only 10 results + + +```r +highplos(q = "everything:\"experiment\"", fl = "id,title", fq = "doc_type:full", + rows = 2) +``` + +``` +## $`10.1371/journal.pone.0039681` +## $`10.1371/journal.pone.0039681`$everything +## [1] " Selection of Transcriptomics Experiments Improves Guilt-by-Association Analyses Transcriptomics Experiment" +## +## +## $`10.1371/journal.pone.0051016` +## $`10.1371/journal.pone.0051016`$everything +## [1] " Evolutionary Biology Breeding Experience Might Be a Major Determinant of Breeding Probability in Long-Lived" +``` + + +### Search for terms and visualize results as a histogram OR as a plot through time + +`plosword` allows you to search for 1 to K words and visualize the results +as a histogram, comparing number of matching papers for each word + + +```r +out <- plosword(list("monkey", "Helianthus", "sunflower", "protein", "whale"), + vis = "TRUE") +out$table +``` + +``` +## No_Articles Term +## 1 6894 monkey +## 2 230 Helianthus +## 3 607 sunflower +## 4 73707 protein +## 5 797 whale +``` + + + +```r +out$plot +``` + +![plot of chunk plosword1plot](figure/plosword1plot.png) + + +You can also pass in curl options, in this case get verbose information on the curl call. + + +```r +plosword("Helianthus", callopts = list(verbose = TRUE)) +``` + +``` +## Number of articles with search term +## 230 +``` + + +### Visualize terms + +`plot_througtime` allows you to search for up to 2 words and visualize the results as a line plot through time, comparing number of articles matching through time. Visualize with the ggplot2 package, only up to two terms for now. + + +```r +plot_throughtime(terms = "phylogeny", limit = 200) +``` + +![plot of chunk throughtime1](figure/throughtime1.png) + + +OR using google visualizations through the googleVis package, check it your self using, e.g. (not shown here) + + +```r +plot_throughtime(terms = list("drosophila", "flower"), limit = 200, gvis = TRUE) +``` + + +...And a google visualization will render on your local browser and you +can play with three types of plots (point, histogram, line), all through +time. The plot is not shown here, but try it out for yourself!! diff --git a/vignettes/rplos_vignette.html b/vignettes/rplos_vignette.html new file mode 100644 index 0000000..617661c --- /dev/null +++ b/vignettes/rplos_vignette.html @@ -0,0 +1,281 @@ + + + + + + + + + + \usepackage[vmargin=1in,hmargin=1in]{geometry} + + + + +

rplos tutorial

+

The rplos package interacts with the API services of PLoS (Public Library of Science) Journals. In order to use rplos, you need to obtain your own key to their API services. Instruction for obtaining and installing keys so they load automatically when you launch R are on our GitHub Wiki page Installation and use of API keys.

+

This tutorial will go through three use cases to demonstrate the kinds of things possible in rplos.

+ +

Load package from CRAN

+
install.packages("rplos")
+
library(rplos)
+

Search across PLoS papers in various sections of papers

+

searchplos is a general search, and in this case searches for the term Helianthus and returns the DOI's of matching papers

+
searchplos(q = "Helianthus", fl = "id", limit = 5)
+
##                             id
+## 1 10.1371/journal.pone.0057533
+## 2 10.1371/journal.pone.0045899
+## 3 10.1371/journal.pone.0037191
+## 4 10.1371/journal.pone.0051360
+## 5 10.1371/journal.pone.0070347
+

Get only full article DOIs

+
searchplos(q = "*:*", fl = "id", fq = "doc_type:full", start = 0, limit = 5)
+
##                             id
+## 1 10.1371/journal.pntd.0001525
+## 2 10.1371/journal.pone.0049273
+## 3 10.1371/journal.pone.0031364
+## 4 10.1371/journal.pone.0005841
+## 5 10.1371/journal.pone.0005838
+

Get DOIs for only PLoS One articles

+
searchplos(q = "*:*", fl = "id", fq = "cross_published_journal_key:PLoSONE", 
+    start = 0, limit = 5)
+
##                                          id
+## 1        10.1371/journal.pone.0049274/title
+## 2     10.1371/journal.pone.0049274/abstract
+## 3   10.1371/journal.pone.0049274/references
+## 4         10.1371/journal.pone.0049274/body
+## 5 10.1371/journal.pone.0049274/introduction
+

Get DOIs for full article in PLoS One

+
searchplos(q = "*:*", fl = "id", fq = list("cross_published_journal_key:PLoSONE", 
+    "doc_type:full"), start = 0, limit = 5)
+
##                             id
+## 1 10.1371/journal.pone.0049273
+## 2 10.1371/journal.pone.0031364
+## 3 10.1371/journal.pone.0005841
+## 4 10.1371/journal.pone.0005838
+## 5 10.1371/journal.pone.0074814
+

Serch for many terms

+
q <- c("ecology", "evolution", "science")
+lapply(q, function(x) searchplos(x, limit = 2))
+
## [[1]]
+##                             id
+## 1 10.1371/journal.pone.0059813
+## 2 10.1371/journal.pone.0001248
+## 
+## [[2]]
+##                             id
+## 1 10.1371/journal.pbio.0050030
+## 2 10.1371/journal.pbio.0030245
+## 
+## [[3]]
+##                             id
+## 1 10.1371/journal.pbio.0020122
+## 2 10.1371/journal.pbio.1001166
+

Search on specific sections

+

A suite of functions were created as light wrappers around searchplos as a shorthand to search specific sections of a paper.

+ +

plosauthor searches across authors, and in this case returns the authors of the matching papers. the fl parameter determines what is returned

+
plosauthor(q = "Eisen", fl = "author", limit = 5)
+
##             author
+## 1 Jonathan A Eisen
+## 2 Jonathan A Eisen
+## 3 Jonathan A Eisen
+## 4 Jonathan A Eisen
+## 5 Jonathan A Eisen
+

plosabstract searches across abstracts, and in this case returns the id and title of the matching papers

+
plosabstract(q = "drosophila", fl = "id,title", limit = 5)
+
##                             id
+## 1 10.1371/journal.pbio.0040198
+## 2 10.1371/journal.pbio.0030246
+## 3 10.1371/journal.pone.0012421
+## 4 10.1371/journal.pbio.1000318
+## 5 10.1371/journal.pbio.0030389
+##                                                                                       title
+## 1                                                                               All for All
+## 2                                               School Students as Drosophila Experimenters
+## 3                                      Host Range and Specificity of the Drosophila C Virus
+## 4 Genomic Responses to Abnormal Gene Dosage: The X Chromosome Improved on a Common Strategy
+## 5                               New Environments Set the Stage for Changing Tastes in Mates
+

plostitle searches across titles, and in this case returns the title and journal of the matching papers

+
plostitle(q = "drosophila", fl = "title,journal", limit = 5)
+
##                      journal
+## 1 PLoS Computational Biology
+## 2               PLoS Biology
+## 3              PLoS Genetics
+## 4                   PLoS ONE
+## 5               PLoS Biology
+##                                                   title
+## 1            Parametric Alignment of Drosophila Genomes
+## 2           School Students as Drosophila Experimenters
+## 3 Phenotypic Plasticity of the Drosophila Transcriptome
+## 4              A Tripartite Synapse Model in Drosophila
+## 5           Expression in Aneuploid Drosophila S2 Cells
+ +

Facet by journal

+
facetplos(q = "*:*", facet.field = "journal")
+
## $facet_queries
+## NULL
+## 
+## $facet_fields
+## $facet_fields$journal
+##                                  X1     X2
+## 1                          plos one 704650
+## 2                     plos genetics  34218
+## 3                    plos pathogens  29964
+## 4        plos computational biology  25417
+## 5                      plos biology  24257
+## 6  plos neglected tropical diseases  19520
+## 7                     plos medicine  17232
+## 8              plos clinical trials    521
+## 9                      plos medicin      9
+## 10                 plos collections      5
+## 
+## 
+## $facet_dates
+## NULL
+## 
+## $facet_ranges
+## NULL
+

Using facet.query to get counts

+
facetplos(q = "*:*", facet.field = "journal", facet.query = "cell,bird")
+
## $facet_queries
+##   term value
+## 1 cell 83275
+## 2 bird  8353
+## 
+## $facet_fields
+## $facet_fields$journal
+##                                  X1     X2
+## 1                          plos one 704650
+## 2                     plos genetics  34218
+## 3                    plos pathogens  29964
+## 4        plos computational biology  25417
+## 5                      plos biology  24257
+## 6  plos neglected tropical diseases  19520
+## 7                     plos medicine  17232
+## 8              plos clinical trials    521
+## 9                      plos medicin      9
+## 10                 plos collections      5
+## 
+## 
+## $facet_dates
+## NULL
+## 
+## $facet_ranges
+## NULL
+

Date faceting

+
facetplos(q = "*:*", url = url, facet.date = "publication_date", facet.date.start = "NOW/DAY-5DAYS", 
+    facet.date.end = "NOW", facet.date.gap = "+1DAY")
+
## $facet_queries
+## NULL
+## 
+## $facet_fields
+## NULL
+## 
+## $facet_dates
+## $facet_dates$publication_date
+##                   date value
+## 1 2014-02-12T00:00:00Z  2508
+## 2 2014-02-13T00:00:00Z  2334
+## 3 2014-02-14T00:00:00Z   968
+## 4 2014-02-15T00:00:00Z     0
+## 5 2014-02-16T00:00:00Z     0
+## 6 2014-02-17T00:00:00Z     0
+## 
+## 
+## $facet_ranges
+## NULL
+ +

Search for the term alcohol in the abstracts of articles, return only 10 results

+
highplos(q = "alcohol", hl.fl = "abstract", rows = 2)
+
## $`10.1371/journal.pmed.0040151`
+## $`10.1371/journal.pmed.0040151`$abstract
+## [1] "Background: <em>Alcohol</em> consumption causes an estimated 4% of the global disease burden, prompting"
+## 
+## 
+## $`10.1371/journal.pone.0027752`
+## $`10.1371/journal.pone.0027752`$abstract
+## [1] "Background: The negative influences of <em>alcohol</em> on TB management with regard to delays in seeking"
+

Search for the term alcohol in the abstracts of articles, and return fragment size of 20 characters, return only 5 results

+
highplos(q = "alcohol", hl.fl = "abstract", hl.fragsize = 20, rows = 2)
+
## $`10.1371/journal.pmed.0040151`
+## $`10.1371/journal.pmed.0040151`$abstract
+## [1] "Background: <em>Alcohol</em>"
+## 
+## 
+## $`10.1371/journal.pone.0027752`
+## $`10.1371/journal.pone.0027752`$abstract
+## [1] " of <em>alcohol</em> on TB management"
+

Search for the term experiment across all sections of an article, return id (DOI) and title fl only, search in full articles only (via fq='doc_type:full'), and return only 10 results

+
highplos(q = "everything:\"experiment\"", fl = "id,title", fq = "doc_type:full", 
+    rows = 2)
+
## $`10.1371/journal.pone.0039681`
+## $`10.1371/journal.pone.0039681`$everything
+## [1] " Selection of Transcriptomics <em>Experiments</em> Improves Guilt-by-Association Analyses Transcriptomics <em>Experiment</em>"
+## 
+## 
+## $`10.1371/journal.pone.0051016`
+## $`10.1371/journal.pone.0051016`$everything
+## [1] "  Evolutionary Biology     Breeding <em>Experience</em> Might Be a Major Determinant of Breeding Probability in Long-Lived"
+

Search for terms and visualize results as a histogram OR as a plot through time

+

plosword allows you to search for 1 to K words and visualize the results as a histogram, comparing number of matching papers for each word

+
out <- plosword(list("monkey", "Helianthus", "sunflower", "protein", "whale"), 
+    vis = "TRUE")
+out$table
+
##   No_Articles       Term
+## 1        6894     monkey
+## 2         230 Helianthus
+## 3         607  sunflower
+## 4       73707    protein
+## 5         797      whale
+
out$plot
+
+plot of chunk plosword1plot

plot of chunk plosword1plot

+
+

You can also pass in curl options, in this case get verbose information on the curl call.

+
plosword("Helianthus", callopts = list(verbose = TRUE))
+
## Number of articles with search term 
+##                                 230
+

Visualize terms

+

plot_througtime allows you to search for up to 2 words and visualize the results as a line plot through time, comparing number of articles matching through time. Visualize with the ggplot2 package, only up to two terms for now.

+
plot_throughtime(terms = "phylogeny", limit = 200)
+
+plot of chunk throughtime1

plot of chunk throughtime1

+
+

OR using google visualizations through the googleVis package, check it your self using, e.g. (not shown here)

+
plot_throughtime(terms = list("drosophila", "flower"), limit = 200, gvis = TRUE)
+

...And a google visualization will render on your local browser and you can play with three types of plots (point, histogram, line), all through time. The plot is not shown here, but try it out for yourself!!

+ + diff --git a/vignettes/rplos_vignette.md b/vignettes/rplos_vignette.md new file mode 100644 index 0000000..64a7a0f --- /dev/null +++ b/vignettes/rplos_vignette.md @@ -0,0 +1,432 @@ + + +rplos tutorial +===== + +The `rplos` package interacts with the API services of [PLoS](http://www.plos.org/) (Public Library of Science) Journals. In order to use `rplos`, you need to obtain [your own key](http://api.plos.org/registration/) to their API services. Instruction for obtaining and installing keys so they load automatically when you launch R are on our GitHub Wiki page [Installation and use of API keys](https://github.com/ropensci/rOpenSci/wiki/Installation-and-use-of-API-keys). + +This tutorial will go through three use cases to demonstrate the kinds +of things possible in `rplos`. + +* Search across PLoS papers in various sections of papers +* Search for terms and visualize results as a histogram OR as a plot through time +* Text mining of scientific literature + +### Load package from CRAN + + +```r +install.packages("rplos") +``` + + + +```r +library(rplos) +``` + + +### Search across PLoS papers in various sections of papers + +`searchplos` is a general search, and in this case searches for the term +**Helianthus** and returns the DOI's of matching papers + + +```r +searchplos(q = "Helianthus", fl = "id", limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0057533 +## 2 10.1371/journal.pone.0045899 +## 3 10.1371/journal.pone.0037191 +## 4 10.1371/journal.pone.0051360 +## 5 10.1371/journal.pone.0070347 +``` + + +Get only full article DOIs + + +```r +searchplos(q = "*:*", fl = "id", fq = "doc_type:full", start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pntd.0001525 +## 2 10.1371/journal.pone.0049273 +## 3 10.1371/journal.pone.0031364 +## 4 10.1371/journal.pone.0005841 +## 5 10.1371/journal.pone.0005838 +``` + + +Get DOIs for only PLoS One articles + + +```r +searchplos(q = "*:*", fl = "id", fq = "cross_published_journal_key:PLoSONE", + start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0049274/title +## 2 10.1371/journal.pone.0049274/abstract +## 3 10.1371/journal.pone.0049274/references +## 4 10.1371/journal.pone.0049274/body +## 5 10.1371/journal.pone.0049274/introduction +``` + + +Get DOIs for full article in PLoS One + + +```r +searchplos(q = "*:*", fl = "id", fq = list("cross_published_journal_key:PLoSONE", + "doc_type:full"), start = 0, limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pone.0049273 +## 2 10.1371/journal.pone.0031364 +## 3 10.1371/journal.pone.0005841 +## 4 10.1371/journal.pone.0005838 +## 5 10.1371/journal.pone.0074814 +``` + + +Serch for many terms + + +```r +q <- c("ecology", "evolution", "science") +lapply(q, function(x) searchplos(x, limit = 2)) +``` + +``` +## [[1]] +## id +## 1 10.1371/journal.pone.0059813 +## 2 10.1371/journal.pone.0001248 +## +## [[2]] +## id +## 1 10.1371/journal.pbio.0050030 +## 2 10.1371/journal.pbio.0030245 +## +## [[3]] +## id +## 1 10.1371/journal.pbio.0020122 +## 2 10.1371/journal.pbio.1001166 +``` + + +### Search on specific sections + +A suite of functions were created as light wrappers around `searchplos` as a shorthand to search specific sections of a paper. + +* `plosauthor` searchers in authors +* `plosabstract` searches in abstracts +* `plostitle` searches in titles +* `plosfigtabcaps` searches in figure and table captions +* `plossubject` searches in subject areas + +`plosauthor` searches across authors, and in this case returns the authors of the matching papers. the fl parameter determines what is returned + + +```r +plosauthor(q = "Eisen", fl = "author", limit = 5) +``` + +``` +## author +## 1 Jonathan A Eisen +## 2 Jonathan A Eisen +## 3 Jonathan A Eisen +## 4 Jonathan A Eisen +## 5 Jonathan A Eisen +``` + + +`plosabstract` searches across abstracts, and in this case returns the id and title of the matching papers + + +```r +plosabstract(q = "drosophila", fl = "id,title", limit = 5) +``` + +``` +## id +## 1 10.1371/journal.pbio.0040198 +## 2 10.1371/journal.pbio.0030246 +## 3 10.1371/journal.pone.0012421 +## 4 10.1371/journal.pbio.1000318 +## 5 10.1371/journal.pbio.0030389 +## title +## 1 All for All +## 2 School Students as Drosophila Experimenters +## 3 Host Range and Specificity of the Drosophila C Virus +## 4 Genomic Responses to Abnormal Gene Dosage: The X Chromosome Improved on a Common Strategy +## 5 New Environments Set the Stage for Changing Tastes in Mates +``` + + +`plostitle` searches across titles, and in this case returns the title and journal of the matching papers + + +```r +plostitle(q = "drosophila", fl = "title,journal", limit = 5) +``` + +``` +## journal +## 1 PLoS Computational Biology +## 2 PLoS Biology +## 3 PLoS Genetics +## 4 PLoS ONE +## 5 PLoS Biology +## title +## 1 Parametric Alignment of Drosophila Genomes +## 2 School Students as Drosophila Experimenters +## 3 Phenotypic Plasticity of the Drosophila Transcriptome +## 4 A Tripartite Synapse Model in Drosophila +## 5 Expression in Aneuploid Drosophila S2 Cells +``` + + +### Faceted search + +Facet by journal + + +```r +facetplos(q = "*:*", facet.field = "journal") +``` + +``` +## $facet_queries +## NULL +## +## $facet_fields +## $facet_fields$journal +## X1 X2 +## 1 plos one 704650 +## 2 plos genetics 34218 +## 3 plos pathogens 29964 +## 4 plos computational biology 25417 +## 5 plos biology 24257 +## 6 plos neglected tropical diseases 19520 +## 7 plos medicine 17232 +## 8 plos clinical trials 521 +## 9 plos medicin 9 +## 10 plos collections 5 +## +## +## $facet_dates +## NULL +## +## $facet_ranges +## NULL +``` + + +Using `facet.query` to get counts + + +```r +facetplos(q = "*:*", facet.field = "journal", facet.query = "cell,bird") +``` + +``` +## $facet_queries +## term value +## 1 cell 83275 +## 2 bird 8353 +## +## $facet_fields +## $facet_fields$journal +## X1 X2 +## 1 plos one 704650 +## 2 plos genetics 34218 +## 3 plos pathogens 29964 +## 4 plos computational biology 25417 +## 5 plos biology 24257 +## 6 plos neglected tropical diseases 19520 +## 7 plos medicine 17232 +## 8 plos clinical trials 521 +## 9 plos medicin 9 +## 10 plos collections 5 +## +## +## $facet_dates +## NULL +## +## $facet_ranges +## NULL +``` + + +Date faceting + + +```r +facetplos(q = "*:*", url = url, facet.date = "publication_date", facet.date.start = "NOW/DAY-5DAYS", + facet.date.end = "NOW", facet.date.gap = "+1DAY") +``` + +``` +## $facet_queries +## NULL +## +## $facet_fields +## NULL +## +## $facet_dates +## $facet_dates$publication_date +## date value +## 1 2014-02-12T00:00:00Z 2508 +## 2 2014-02-13T00:00:00Z 2334 +## 3 2014-02-14T00:00:00Z 968 +## 4 2014-02-15T00:00:00Z 0 +## 5 2014-02-16T00:00:00Z 0 +## 6 2014-02-17T00:00:00Z 0 +## +## +## $facet_ranges +## NULL +``` + + +### Highlighted search + +Search for the term _alcohol_ in the abstracts of articles, return only 10 results + + +```r +highplos(q = "alcohol", hl.fl = "abstract", rows = 2) +``` + +``` +## $`10.1371/journal.pmed.0040151` +## $`10.1371/journal.pmed.0040151`$abstract +## [1] "Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting" +## +## +## $`10.1371/journal.pone.0027752` +## $`10.1371/journal.pone.0027752`$abstract +## [1] "Background: The negative influences of alcohol on TB management with regard to delays in seeking" +``` + + +Search for the term _alcohol_ in the abstracts of articles, and return fragment size of 20 characters, return only 5 results + + +```r +highplos(q = "alcohol", hl.fl = "abstract", hl.fragsize = 20, rows = 2) +``` + +``` +## $`10.1371/journal.pmed.0040151` +## $`10.1371/journal.pmed.0040151`$abstract +## [1] "Background: Alcohol" +## +## +## $`10.1371/journal.pone.0027752` +## $`10.1371/journal.pone.0027752`$abstract +## [1] " of alcohol on TB management" +``` + + +Search for the term _experiment_ across all sections of an article, return id (DOI) and title fl only, search in full articles only (via `fq='doc_type:full'`), and return only 10 results + + +```r +highplos(q = "everything:\"experiment\"", fl = "id,title", fq = "doc_type:full", + rows = 2) +``` + +``` +## $`10.1371/journal.pone.0039681` +## $`10.1371/journal.pone.0039681`$everything +## [1] " Selection of Transcriptomics Experiments Improves Guilt-by-Association Analyses Transcriptomics Experiment" +## +## +## $`10.1371/journal.pone.0051016` +## $`10.1371/journal.pone.0051016`$everything +## [1] " Evolutionary Biology Breeding Experience Might Be a Major Determinant of Breeding Probability in Long-Lived" +``` + + +### Search for terms and visualize results as a histogram OR as a plot through time + +`plosword` allows you to search for 1 to K words and visualize the results +as a histogram, comparing number of matching papers for each word + + +```r +out <- plosword(list("monkey", "Helianthus", "sunflower", "protein", "whale"), + vis = "TRUE") +out$table +``` + +``` +## No_Articles Term +## 1 6894 monkey +## 2 230 Helianthus +## 3 607 sunflower +## 4 73707 protein +## 5 797 whale +``` + + + +```r +out$plot +``` + +![plot of chunk plosword1plot](figure/plosword1plot.png) + + +You can also pass in curl options, in this case get verbose information on the curl call. + + +```r +plosword("Helianthus", callopts = list(verbose = TRUE)) +``` + +``` +## Number of articles with search term +## 230 +``` + + +### Visualize terms + +`plot_througtime` allows you to search for up to 2 words and visualize the results as a line plot through time, comparing number of articles matching through time. Visualize with the ggplot2 package, only up to two terms for now. + + +```r +plot_throughtime(terms = "phylogeny", limit = 200) +``` + +![plot of chunk throughtime1](figure/throughtime1.png) + + +OR using google visualizations through the googleVis package, check it your self using, e.g. (not shown here) + + +```r +plot_throughtime(terms = list("drosophila", "flower"), limit = 200, gvis = TRUE) +``` + + +...And a google visualization will render on your local browser and you +can play with three types of plots (point, histogram, line), all through +time. The plot is not shown here, but try it out for yourself!! diff --git a/vignettes/rplos_vignette.pdf b/vignettes/rplos_vignette.pdf new file mode 100644 index 0000000..252775b Binary files /dev/null and b/vignettes/rplos_vignette.pdf differ