From b39d1ce92a0fedfcad70a0c46b34101dc490afe0 Mon Sep 17 00:00:00 2001 From: Scott Chamberlain Date: Mon, 30 Apr 2018 13:33:57 -0700 Subject: [PATCH] use markdown docs, fix #10 - not really fixed, but as good as we can do related, export a function fro user to get the all dataset --- DESCRIPTION | 1 + NAMESPACE | 1 + R/eol.R | 81 ++++++++++++++++++++++++++++++++---------- man/eol.Rd | 58 ++++++++++++++++++------------ man/flora_europaea.Rd | 18 +++++----- man/gisd.Rd | 2 +- man/griis.Rd | 4 +-- man/originr-package.Rd | 10 +++--- 8 files changed, 116 insertions(+), 59 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 61c7a4e..57cd410 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Authors@R: c( License: MIT + file LICENSE URL: https://github.com/ropensci/originr BugReports: https://github.com/ropensci/originr/issues +Roxygen: list(markdown = TRUE) Imports: crul (>= 0.5.2), jsonlite (>= 1.5), diff --git a/NAMESPACE b/NAMESPACE index 7a83ac6..e427f89 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(eol) +export(eol_invasive_data) export(flora_europaea) export(gisd) export(griis) diff --git a/R/eol.R b/R/eol.R index 34355b9..15aaf08 100644 --- a/R/eol.R +++ b/R/eol.R @@ -19,6 +19,16 @@ #' @param ... curl options passed on to \code{\link[crul]{HttpClient}} #' #' @details +#' `eol_invasive_data()` gives you the entire data.frame from +#' the "dataset=all", while `eol()` let's you search on a vector of names +#' against any of the datasets +#' +#' IMPORTANT: note that setting `dataset="all"` will give you surprising results. +#' EOL does not include informaiton on which of the invasive datasets (i.e., gisd100, +#' gisd, isc, daisie, i3n, or mineps) the taxon is found in, and sometimes e.g., if +#' taxon X is in GISD, you might not find it in "all", weird. I don't know +#' why that's happening, but it shouldn't happen. +#' #' IMPORTANT: When you get a returned NaN for a taxon, that means it's not on #' the invasive list in question. If the taxon is found, a taxon identifier #' is returned. @@ -27,40 +37,37 @@ #' pull down all data before we can search for your species. Note there is no #' parameter in this API method for searching by taxon name. #' -#' This function is vectorized, so you can pass a single name or a vector +#' `eol()` is vectorized, so you can pass a single name or a vector #' of names. #' #' It's possible to return JSON or XML with the EOL API. However, this function -#' only returns JSON for now. +#' only returns JSON. #' #' Options for the dataset parameter are -#' \itemize{ -#' \item all - All datasets -#' \item gisd100 - 100 of the World's Worst Invasive Alien Species +#' +#' - all - All datasets +#' - gisd100 - 100 of the World's Worst Invasive Alien Species #' (Global Invasive Species Database) http://eol.org/collections/54500 -#' \item gisd - Global Invasive Species Database 2013 +#' - gisd - Global Invasive Species Database 2013 #' http://eol.org/collections/54983 -#' \item isc - Centre for Agriculture and Biosciences International Invasive +#' - isc - Centre for Agriculture and Biosciences International Invasive #' Species Compendium (ISC) http://eol.org/collections/55180 -#' \item daisie - Delivering Alien Invasive Species Inventories for Europe +#' - daisie - Delivering Alien Invasive Species Inventories for Europe #' (DAISIE) Species List http://eol.org/collections/55179 -#' \item i3n - IABIN Invasives Information Network (I3N) Species +#' - i3n - IABIN Invasives Information Network (I3N) Species #' http://eol.org/collections/55176 -#' \item mineps - Marine Invaders of the NE Pacific +#' - mineps - Marine Invaders of the NE Pacific #' Species http://eol.org/collections/55331 -#' } #' #' Datasets are not updated that often. Here's last updated dates for some of #' the datasets as of 2014-08-25 #' -#' \itemize{ -#' \item gisd100 updated 6 mos ago -#' \item gisd updated 1 yr ago -#' \item isc updated 1 yr ago -#' \item daisie updated 1 yr ago -#' \item i3n updated 1 yr ago -#' \item mineps updated 1 yr ago -#' } +#' - gisd100 updated 6 mos ago +#' - gisd updated 1 yr ago +#' - isc updated 1 yr ago +#' - daisie updated 1 yr ago +#' - i3n updated 1 yr ago +#' - mineps updated 1 yr ago #' #' @return A list of data.frame's/strings with results, with each element #' named by the input elements to the name parameter. @@ -163,3 +170,39 @@ getmatches <- function(x, y, z) { z[matched, ] } } + +#' @export +#' @rdname eol +eol_invasive_data <- function(...) { + args <- orc(list(per_page = 500, filter = 'taxa')) + path <- "/api/collections/1.0/55367.json" + cli <- crul::HttpClient$new(url = 'http://eol.org', + opts = list(...)) + tt <- cli$get(path, query = args) + tt$raise_for_status() + res <- jsonlite::fromJSON(tt$parse("UTF-8"), FALSE) + data_init <- res$collection_items + message(sprintf("Getting data for %s names...", res$total_items)) + + pages_get <- pages_left(res) + + if (!is.null(pages_get)) { + out <- list() + for (i in pages_get) { + args <- orc(list(page = i, per_page = 500, filter = 'taxa')) + tt <- cli$get(path, query = args) + tt$raise_for_status() + res <- jsonlite::fromJSON(tt$parse("UTF-8"), FALSE) + out[[i]] <- res$collection_items + } + res2 <- orc(out) + dat_all <- do.call(c, list(data_init, do.call(c, res2))) + dat_all <- lapply(dat_all, "[", c("name", "object_id")) + dat <- todf(dat_all) + } else { + dat_all <- lapply(data_init, "[", c("name","object_id")) + dat <- todf(dat_all) + } + return(dat) +} + diff --git a/man/eol.Rd b/man/eol.Rd index a168d46..84fb76a 100644 --- a/man/eol.Rd +++ b/man/eol.Rd @@ -2,10 +2,13 @@ % Please edit documentation in R/eol.R \name{eol} \alias{eol} +\alias{eol_invasive_data} \title{Search for presence of taxonomic names in EOL invasive species databases.} \usage{ eol(name, dataset = "all", searchby = grep, page = NULL, per_page = 500, key = NULL, messages = TRUE, count = FALSE, ...) + +eol_invasive_data(...) } \arguments{ \item{name}{A taxonomic name, or a vector of names.} @@ -23,7 +26,7 @@ allows you to fetch more pages of results if there are more than 30 matches \item{key}{Your EOL API key; loads from .Rprofile.} -\item{messages}{(logical) If \code{TRUE} the actual taxon queried is printed +\item{messages}{(logical) If \code{TRUE} the actual taxon queried is printed on the console.} \item{count}{(logical) If TRUE, give back a count of number of taxa listed @@ -39,6 +42,16 @@ named by the input elements to the name parameter. See Details for important information. } \details{ +\code{eol_invasive_data()} gives you the entire data.frame from +the "dataset=all", while \code{eol()} let's you search on a vector of names +against any of the datasets + +IMPORTANT: note that setting \code{dataset="all"} will give you surprising results. +EOL does not include informaiton on which of the invasive datasets (i.e., gisd100, +gisd, isc, daisie, i3n, or mineps) the taxon is found in, and sometimes e.g., if +taxon X is in GISD, you might not find it in "all", weird. I don't know +why that's happening, but it shouldn't happen. + IMPORTANT: When you get a returned NaN for a taxon, that means it's not on the invasive list in question. If the taxon is found, a taxon identifier is returned. @@ -47,39 +60,38 @@ Beware that some datasets are quite large, and may take 30 sec to a minute to pull down all data before we can search for your species. Note there is no parameter in this API method for searching by taxon name. -This function is vectorized, so you can pass a single name or a vector +\code{eol()} is vectorized, so you can pass a single name or a vector of names. It's possible to return JSON or XML with the EOL API. However, this function -only returns JSON for now. +only returns JSON. Options for the dataset parameter are \itemize{ - \item all - All datasets - \item gisd100 - 100 of the World's Worst Invasive Alien Species - (Global Invasive Species Database) http://eol.org/collections/54500 - \item gisd - Global Invasive Species Database 2013 - http://eol.org/collections/54983 - \item isc - Centre for Agriculture and Biosciences International Invasive - Species Compendium (ISC) http://eol.org/collections/55180 - \item daisie - Delivering Alien Invasive Species Inventories for Europe - (DAISIE) Species List http://eol.org/collections/55179 - \item i3n - IABIN Invasives Information Network (I3N) Species - http://eol.org/collections/55176 - \item mineps - Marine Invaders of the NE Pacific - Species http://eol.org/collections/55331 +\item all - All datasets +\item gisd100 - 100 of the World's Worst Invasive Alien Species +(Global Invasive Species Database) http://eol.org/collections/54500 +\item gisd - Global Invasive Species Database 2013 +http://eol.org/collections/54983 +\item isc - Centre for Agriculture and Biosciences International Invasive +Species Compendium (ISC) http://eol.org/collections/55180 +\item daisie - Delivering Alien Invasive Species Inventories for Europe +(DAISIE) Species List http://eol.org/collections/55179 +\item i3n - IABIN Invasives Information Network (I3N) Species +http://eol.org/collections/55176 +\item mineps - Marine Invaders of the NE Pacific +Species http://eol.org/collections/55331 } Datasets are not updated that often. Here's last updated dates for some of the datasets as of 2014-08-25 - \itemize{ - \item gisd100 updated 6 mos ago - \item gisd updated 1 yr ago - \item isc updated 1 yr ago - \item daisie updated 1 yr ago - \item i3n updated 1 yr ago - \item mineps updated 1 yr ago +\item gisd100 updated 6 mos ago +\item gisd updated 1 yr ago +\item isc updated 1 yr ago +\item daisie updated 1 yr ago +\item i3n updated 1 yr ago +\item mineps updated 1 yr ago } } \examples{ diff --git a/man/flora_europaea.Rd b/man/flora_europaea.Rd index e7892a1..dc9fc18 100644 --- a/man/flora_europaea.Rd +++ b/man/flora_europaea.Rd @@ -7,33 +7,33 @@ flora_europaea(sp, messages = TRUE, ...) } \arguments{ -\item{sp}{character; a vector of length one with a single scientific +\item{sp}{character; a vector of length one with a single scientific species names in the form of \code{c("Genus species")}.} -\item{messages}{logical; If \code{TRUE} (default), informative messages +\item{messages}{logical; If \code{TRUE} (default), informative messages printed} \item{...}{curl options passed on to \code{\link[crul]{HttpClient}}} } \value{ -A list of vectors containing the countries where the species is +A list of vectors containing the countries where the species is native, exotic, ... } \description{ -This function check the status (native or exotic) of a species +This function check the status (native or exotic) of a species in each of the eu countries. -For that end, it checks Flora Europaea (http://rbg-web2.rbge.org.uk/FE/fe.html) +For that end, it checks Flora Europaea (http://rbg-web2.rbge.org.uk/FE/fe.html) and scrapes the data from there. Note that the webpage contains more information. -As expected, the function is as good as the database is. I think for -native species is robust but new exotic species are not added as to my -knowledge the database is not updated anymore. The database is not able to +As expected, the function is as good as the database is. I think for +native species is robust but new exotic species are not added as to my +knowledge the database is not updated anymore. The database is not able to recognize species synonyms. -See \url{http://rbg-web2.rbge.org.uk/FE/data/countries} for explanation +See \url{http://rbg-web2.rbge.org.uk/FE/data/countries} for explanation of the database codes. } \examples{ diff --git a/man/gisd.Rd b/man/gisd.Rd index d211e75..9840d41 100644 --- a/man/gisd.Rd +++ b/man/gisd.Rd @@ -15,7 +15,7 @@ values "Invasive", "Not in GISD". I recomend to check first the not simplified version (default), which contains raw information about the level of invasiveness.} -\item{messages}{logical; If \code{TRUE} (default), informative messages +\item{messages}{logical; If \code{TRUE} (default), informative messages printed.} \item{...}{curl options passed on to \code{\link[crul]{HttpClient}}} diff --git a/man/griis.Rd b/man/griis.Rd index e9b77cb..a550490 100644 --- a/man/griis.Rd +++ b/man/griis.Rd @@ -32,7 +32,7 @@ the results. Default to NULL: return all records.} } \value{ A data.frame with species names, country where recorded, - origin and source among other fields. +origin and source among other fields. } \description{ This retrieves information from GRIIS (http://www.griis.org/) @@ -41,7 +41,7 @@ function is as good as the database is. } \note{ It seems as 'name' overrides 'kindom', which means records from a - a plant species will be returned even if kindom is set to animalia. +a plant species will be returned even if kindom is set to animalia. } \examples{ \dontrun{ diff --git a/man/originr-package.Rd b/man/originr-package.Rd index a223488..c8a7443 100644 --- a/man/originr-package.Rd +++ b/man/originr-package.Rd @@ -11,11 +11,11 @@ originr - Species Origin Data \section{Data sources in the package}{ \itemize{ - \item Encyclopedia of Life (http://eol.org) - \item Flora Europaea (http://rbg-web2.rbge.org.uk/FE/fe.html) - \item Global Invasive Species Database (http://www.iucngisd.org/gisd) - \item Native Species Resolver (http://bien.nceas.ucsb.edu/bien/tools/nsr/nsr-ws/) - \item Integrated Taxonomic Information Service (http://www.itis.gov/) +\item Encyclopedia of Life (http://eol.org) +\item Flora Europaea (http://rbg-web2.rbge.org.uk/FE/fe.html) +\item Global Invasive Species Database (http://www.iucngisd.org/gisd) +\item Native Species Resolver (http://bien.nceas.ucsb.edu/bien/tools/nsr/nsr-ws/) +\item Integrated Taxonomic Information Service (http://www.itis.gov/) } }