fix #1 fix #2 fix two issues with summarizer fxn

ropensci-archive · Nov 30, 2017 · 405b0b4 · 405b0b4
1 parent e6518a1
commit 405b0b4
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 374 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(print,tv_summary)
 S3method(tv_clean_ids,character)
 S3method(tv_clean_ids,data.frame)
 S3method(tv_clean_ids,default)

diff --git a/R/do_tax_clean.R b/R/do_tax_clean.R
@@ -1,43 +1,41 @@
 db_options <- c("bold", "col", "eol", "gbif", "iucn", "natserv", "nbn", 
   "tol", "tropicos", "itis", "ncbi", "worms")
 
-
 do_nms <- function(x, tax_names = NULL, tax_ids = NULL, db = NULL) {
-    if (!is.null(tax_names)) {
-        xx <- x[[tax_names]] 
-        if (is.null(xx)) stop("'tax_names' field not found or empty")
-        # get ID for each name
-        # get classifications for each ID
-    } else {
-        xx <- x[[tax_ids]]
-        if (is.null(xx)) stop("'tax_ids' field not found or empty")
-        # get classifications for each ID
-        taxize::classification()
-    }
-
-    xx_uniq <- unique(xx)
-    if (!db %in$% db_options) stop("'db' not in set of db options, see help")
-
+  if (!is.null(tax_names)) {
+      xx <- x[[tax_names]] 
+      if (is.null(xx)) stop("'tax_names' field not found or empty")
+      # get ID for each name
+      # get classifications for each ID
+  } else {
+      xx <- x[[tax_ids]]
+      if (is.null(xx)) stop("'tax_ids' field not found or empty")
+      # get classifications for each ID
+      taxize::classification()
+  }
+
+  xx_uniq <- unique(xx)
+  if (!db %in$% db_options) stop("'db' not in set of db options, see help")
 }
 
 do_ids <- function(x, col = NULL, ids = NULL, db = NULL) {
-    if (!db %in% db_options) stop("'db' not in set of db options, see help")
-    if (!is.null(col)) {
-        ids <- x[[col]] 
-        if (is.null(ids)) stop("'col' field not found or empty")
-    }
-    
-    # get uniq ids
-    ids_u <- unique(ids)
-    # get classifications for each ID
-    ## chunk by 50 per request in attemp to avoid server timeouts/etc
-    rws <- seq_len(length(ids))
-    chunk_size <- 50
-    chks <- split(rws, ceiling(seq_along(rws) / chunk_size))
-    cls <- list()
-    for (i in seq_along(chks)) {
-        cls[[i]] <- taxize::classification(ids_u[chks[[i]]], db = db)
-    }
-    cls <- unlist(cls, recursive = FALSE)
-    return(rbind(structure(cls, class = "classification")))
+  if (!db %in% db_options) stop("'db' not in set of db options, see help")
+  if (!is.null(col)) {
+      ids <- x[[col]] 
+      if (is.null(ids)) stop("'col' field not found or empty")
+  }
+
+  # get uniq ids
+  ids_u <- unique(ids)
+  # get classifications for each ID
+  ## chunk by 50 per request in attemp to avoid server timeouts/etc
+  rws <- seq_len(length(ids))
+  chunk_size <- 50
+  chks <- split(rws, ceiling(seq_along(rws) / chunk_size))
+  cls <- list()
+  for (i in seq_along(chks)) {
+      cls[[i]] <- taxize::classification(ids_u[chks[[i]]], db = db)
+  }
+  cls <- unlist(cls, recursive = FALSE)
+  return(rbind(structure(cls, class = "classification")))
 }
diff --git a/R/tv_summarise.R b/R/tv_summarise.R
@@ -10,13 +10,16 @@
 #'    data.table = FALSE))
 #' out <- tv_clean_ids(x, ids = dat$id, db = "ncbi")
 #' (res <- tv_summarise(out))
+#' res$summary
 #' res$by_rank
 #' res$by_rank_name
 #' res$by_within_rank
 tv_summarise <- function(x) {
 	# must be a data.frame
 	assert(x, "data.frame")
   x <- tbl_df(x)
+  # summary data
+  sumdat <- length(unique(x$query))
 	# by rank
   rank <- x %>% 
     group_by(rank) %>% 
@@ -37,15 +40,24 @@ tv_summarise <- function(x) {
     group_by(rank) %>% 
     select(-percent) %>% 
     mutate(percent = round((count / sum(count)) * 100)) %>% 
-    nest() %>% 
-    unlist(recursive = FALSE) %>% 
-    unname
+    nest()
+  within_rank <- stats::setNames(within_rank$data, within_rank$rank)
 
   # compile output
   out <- list(
+    summary = list(spp = sumdat),
     by_rank = rank, 
     by_rank_name = rank_name,
     by_within_rank = within_rank
   )
 	return(structure(out, class = "tv_summary"))
 }
+
+#' @export
+print.tv_summary <- function(x, ...) {
+  cat("<tv_summary>", sep = "\n")
+  cat(sprintf(" no. taxa: %s", x$summary$spp), sep = "\n")
+  cat(sprintf(" by rank: N (%s)", NROW(x$by_rank)), sep = "\n")
+  cat(sprintf(" by rank name: N (%s)", NROW(x$by_rank_name)), sep = "\n")
+  cat(sprintf(" within ranks: N (%s)", length(x$by_within_rank)), sep = "\n")
+}
diff --git a/README.Rmd b/README.Rmd
@@ -1,6 +1,14 @@
 taxview
 =======
 
+```{r echo=FALSE}
+knitr::opts_chunk$set(
+  comment = "#>",
+  collapse = TRUE,
+  warning = FALSE
+)
+```
+
 taxonomy based data explorer
 
 ## install