Skip to content

Commit

Permalink
added nbn_search and get_nbnid fxns #332
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Oct 1, 2014
1 parent 42b3eb2 commit 9529b86
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export(get_gbifid)
export(get_genes)
export(get_genes_avail)
export(get_ids)
export(get_nbnid)
export(get_seqs)
export(get_tpsid)
export(get_tsn)
Expand Down Expand Up @@ -139,6 +140,7 @@ export(iucn_getname)
export(iucn_status)
export(iucn_summary)
export(names_list)
export(nbn_search)
export(ncbi_children)
export(ncbi_get_taxon_summary)
export(ncbi_getbyid)
Expand Down
105 changes: 105 additions & 0 deletions R/get_nbnid.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' Get the UK National Biodiversity Network ID from taxonomic names.
#'
#' @export
#'
#' @param name character; scientific name.
#' @param ask logical; should get_nbnid be run in interactive mode?
#' If TRUE and more than one ID is found for the species, the user is asked for
#' input. If FALSE NA is returned for multiple matches.
#' @param verbose logical; If TRUE the actual taxon queried is printed on the
#' console.
#' @param rec_only (logical) If \code{TRUE} ids of recommended names are returned (i.e.
#' synonyms are removed). Defaults to \code{FALSE}. Remember, the id of a synonym is a
#' taxa with 'recommended' name status.
#' @param rank (character) If given, we attempt to limit the results to those taxa with the
#' matching rank.
#' @param ... Further args passed on to \code{nbn_search}
#'
#' @return A vector of unique identifiers. If a taxon is not found NA.
#' If more than one ID is found the function asks for user input.
#'
#' @seealso \code{\link[taxize]{get_tsn}}, \code{\link[taxize]{get_uid}},
#' \code{\link[taxize]{get_tpsid}}, \code{\link[taxize]{get_eolid}}
#'
#' @author Scott Chamberlain, \email{myrmecocystus@@gmail.com}
#'
#' @examples \donttest{
#' get_nbnid(name='Poa annua')
#' get_nbnid(name='Poa annua', rec_only=TRUE)
#' get_nbnid(name='Poa annua', rank='Species')
#' get_nbnid(name='Poa annua', rec_only=TRUE, rank='Species')
#' get_nbnid(name='Pinus contorta')
#'
#' # The NBN service handles common names too
#' get_nbnid(name='red-winged blackbird')
#'
#' # When not found
#' get_nbnid(name="uaudnadndj")
#' get_nbnid(c("Chironomus riparius", "uaudnadndj"))
#' }

get_nbnid <- function(name, ask = TRUE, verbose = TRUE, rec_only = FALSE, rank = NULL, ...){
fun <- function(name, ask, verbose) {
mssg(verbose, "\nRetrieving data for taxon '", name, "'\n")
df <- nbn_search(q = name, all = TRUE, ...)$data
if(is.null(df)) df <- data.frame(NULL)

rank_taken <- NA
if(nrow(df)==0){
mssg(verbose, "Not found. Consider checking the spelling or alternate classification")
id <- NA
} else
{
if(rec_only) df <- df[ df$nameStatus == 'Recommended', ]
if(!is.null(rank)) df <- df[ df$rank == rank, ]
df <- df[,c('ptaxonVersionKey','searchMatchTitle','rank','nameStatus')]
names(df)[1] <- 'nbnid'
id <- df$nbnid
rank_taken <- as.character(df$rank)
}

# not found on NBN
if(length(id) == 0){
mssg(verbose, "Not found. Consider checking the spelling or alternate classification")
id <- NA
}
# more than one found -> user input
if(length(id) > 1){
if(ask){
rownames(df) <- 1:nrow(df)
# prompt
message("\n\n")
message("\nMore than one nbnid found for taxon '", name, "'!\n
Enter rownumber of taxon (other inputs will return 'NA'):\n")
print(df)
take <- scan(n = 1, quiet = TRUE, what = 'raw')

if(length(take) == 0)
take <- 'notake'
if(take %in% seq_len(nrow(df))){
take <- as.numeric(take)
message("Input accepted, took nbnid '", as.character(df$nbnid[take]), "'.\n")
id <- as.character(df$nbnid[take])
rank_taken <- as.character(df$rank[take])
} else {
id <- NA
mssg(verbose, "\nReturned 'NA'!\n\n")
}
} else{
id <- NA
}
}
return( c(id=id, rank=rank_taken) )
}
name <- as.character(name)
out <- lapply(name, fun, ask=ask, verbose=verbose)
ids <- sapply(out, "[[", "id")
class(ids) <- "nbnid"
if(!is.na(ids[1])){
urls <- taxize_compact(sapply(out, function(z){
if(!is.na(z[['id']])) sprintf('https://data.nbn.org.uk/Taxa/%s', z[['id']])
}))
attr(ids, 'uri') <- unlist(urls)
}
return(ids)
}
50 changes: 50 additions & 0 deletions R/nbn_search.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Search UK National Biodiversity Network database
#'
#' @export
#'
#' @param q (character) The query terms(s)
#' @param prefered (logical) Restrict search to preferred or any
#' @param order (character) The order in which we should sort the results. Default: asc
#' @param sort (character) Sort the results or not.
#' @param start (integer/numeric) The page that the user wants to start displaying the results at.
#' Default: 0
#' @param rows (integer/numeric) The number of rows to show in each page of search results.
#' Default: 25
#' @param taxonOutputGroupKey (character) Vector of taxon output groups.
#' @param all (logical) Get all results, overrides rows parameter if TRUE. Default: FALSE
#' @param ... Further args passed on to \code{\link[httr]{GET}}.
#'
#' @author Scott Chamberlain, \email{myrmecocystus@@gmail.com}
#'
#' @examples \donttest{
#' nbn_search(q = "blackbird")
#' nbn_search(q = "blackbird", start = TRUE)
#' nbn_search(q = "blackbird", all = TRUE)
#' nbn_search(q = "blackbird", taxonOutputGroupKey = "NHMSYS0000080039")
#'
#' # debug curl stuff
#' library('httr')
#' nbn_search(q = "blackbird", config = verbose())
#' }
nbn_search <- function(q, prefered = FALSE, order = 'asc', sort = NULL, start = 0,
rows = 25, taxonOutputGroupKey = NULL, all = FALSE, ...)
{
url <- "https://data.nbn.org.uk/api/search/taxa"
args <- taxize_compact(list(q = q, prefered = prefered, order = order, sort = sort, start = start,
rows = rows, taxonOutputGroupKey = taxonOutputGroupKey))
if(all){
args$rows <- 0
num <- nbn_GET(url, args)$meta$numFound
args$rows <- num
nbn_GET(url, args, ...)
} else { nbn_GET(url, args, ...) }
}

nbn_GET <- function(url, args, ...){
res <- GET(url, query = args, ...)
stop_for_status(res)
tt <- content(res, as = "text")
json <- jsonlite::fromJSON(tt, FALSE)
dat <- do.call(rbind.fill, lapply(json$results, data.frame, stringsAsFactors = FALSE))
list(meta=data.frame(json$header, stringsAsFactors = FALSE), data=dat)
}
58 changes: 58 additions & 0 deletions man/get_nbnid.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\name{get_nbnid}
\alias{get_nbnid}
\title{Get the UK National Biodiversity Network ID from taxonomic names.}
\usage{
get_nbnid(name, ask = TRUE, verbose = TRUE, rec_only = FALSE,
rank = NULL, ...)
}
\arguments{
\item{name}{character; scientific name.}

\item{ask}{logical; should get_nbnid be run in interactive mode?
If TRUE and more than one ID is found for the species, the user is asked for
input. If FALSE NA is returned for multiple matches.}

\item{verbose}{logical; If TRUE the actual taxon queried is printed on the
console.}

\item{rec_only}{(logical) If \code{TRUE} ids of recommended names are returned (i.e.
synonyms are removed). Defaults to \code{FALSE}. Remember, the id of a synonym is a
taxa with 'recommended' name status.}

\item{rank}{(character) If given, we attempt to limit the results to those taxa with the
matching rank.}

\item{...}{Further args passed on to \code{nbn_search}}
}
\value{
A vector of unique identifiers. If a taxon is not found NA.
If more than one ID is found the function asks for user input.
}
\description{
Get the UK National Biodiversity Network ID from taxonomic names.
}
\examples{
\donttest{
get_nbnid(name='Poa annua')
get_nbnid(name='Poa annua', rec_only=TRUE)
get_nbnid(name='Poa annua', rank='Species')
get_nbnid(name='Poa annua', rec_only=TRUE, rank='Species')
get_nbnid(name='Pinus contorta')

# The NBN service handles common names too
get_nbnid(name='red-winged blackbird')

# When not found
get_nbnid(name="uaudnadndj")
get_nbnid(c("Chironomus riparius", "uaudnadndj"))
}
}
\author{
Scott Chamberlain, \email{myrmecocystus@gmail.com}
}
\seealso{
\code{\link[taxize]{get_tsn}}, \code{\link[taxize]{get_uid}},
\code{\link[taxize]{get_tpsid}}, \code{\link[taxize]{get_eolid}}
}

48 changes: 48 additions & 0 deletions man/nbn_search.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\name{nbn_search}
\alias{nbn_search}
\title{Search UK National Biodiversity Network database}
\usage{
nbn_search(q, prefered = FALSE, order = "asc", sort = NULL, start = 0,
rows = 25, taxonOutputGroupKey = NULL, all = FALSE, ...)
}
\arguments{
\item{q}{(character) The query terms(s)}

\item{prefered}{(logical) Restrict search to preferred or any}

\item{order}{(character) The order in which we should sort the results. Default: asc}

\item{sort}{(character) Sort the results or not.}

\item{start}{(integer/numeric) The page that the user wants to start displaying the results at.
Default: 0}

\item{rows}{(integer/numeric) The number of rows to show in each page of search results.
Default: 25}

\item{taxonOutputGroupKey}{(character) Vector of taxon output groups.}

\item{all}{(logical) Get all results, overrides rows parameter if TRUE. Default: FALSE}

\item{...}{Further args passed on to \code{\link[httr]{GET}}.}
}
\description{
Search UK National Biodiversity Network database
}
\examples{
\donttest{
nbn_search(q = "blackbird")
nbn_search(q = "blackbird", start = TRUE)
nbn_search(q = "blackbird", all = TRUE)
nbn_search(q = "blackbird", taxonOutputGroupKey = "NHMSYS0000080039")

# debug curl stuff
library('httr')
nbn_search(q = "blackbird", config = verbose())
}
}
\author{
Scott Chamberlain, \email{myrmecocystus@gmail.com}
}

0 comments on commit 9529b86

Please sign in to comment.