Skip to content
This repository has been archived by the owner on Sep 9, 2022. It is now read-only.

Commit

Permalink
fix #1 fix #2 fix two issues with summarizer fxn
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Nov 30, 2017
1 parent e6518a1 commit 405b0b4
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 374 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(print,tv_summary)
S3method(tv_clean_ids,character)
S3method(tv_clean_ids,data.frame)
S3method(tv_clean_ids,default)
Expand Down
68 changes: 33 additions & 35 deletions R/do_tax_clean.R
@@ -1,43 +1,41 @@
db_options <- c("bold", "col", "eol", "gbif", "iucn", "natserv", "nbn",
"tol", "tropicos", "itis", "ncbi", "worms")


do_nms <- function(x, tax_names = NULL, tax_ids = NULL, db = NULL) {
if (!is.null(tax_names)) {
xx <- x[[tax_names]]
if (is.null(xx)) stop("'tax_names' field not found or empty")
# get ID for each name
# get classifications for each ID
} else {
xx <- x[[tax_ids]]
if (is.null(xx)) stop("'tax_ids' field not found or empty")
# get classifications for each ID
taxize::classification()
}

xx_uniq <- unique(xx)
if (!db %in$% db_options) stop("'db' not in set of db options, see help")

if (!is.null(tax_names)) {
xx <- x[[tax_names]]
if (is.null(xx)) stop("'tax_names' field not found or empty")
# get ID for each name
# get classifications for each ID
} else {
xx <- x[[tax_ids]]
if (is.null(xx)) stop("'tax_ids' field not found or empty")
# get classifications for each ID
taxize::classification()
}

xx_uniq <- unique(xx)
if (!db %in$% db_options) stop("'db' not in set of db options, see help")
}

do_ids <- function(x, col = NULL, ids = NULL, db = NULL) {
if (!db %in% db_options) stop("'db' not in set of db options, see help")
if (!is.null(col)) {
ids <- x[[col]]
if (is.null(ids)) stop("'col' field not found or empty")
}
# get uniq ids
ids_u <- unique(ids)
# get classifications for each ID
## chunk by 50 per request in attemp to avoid server timeouts/etc
rws <- seq_len(length(ids))
chunk_size <- 50
chks <- split(rws, ceiling(seq_along(rws) / chunk_size))
cls <- list()
for (i in seq_along(chks)) {
cls[[i]] <- taxize::classification(ids_u[chks[[i]]], db = db)
}
cls <- unlist(cls, recursive = FALSE)
return(rbind(structure(cls, class = "classification")))
if (!db %in% db_options) stop("'db' not in set of db options, see help")
if (!is.null(col)) {
ids <- x[[col]]
if (is.null(ids)) stop("'col' field not found or empty")
}

# get uniq ids
ids_u <- unique(ids)
# get classifications for each ID
## chunk by 50 per request in attemp to avoid server timeouts/etc
rws <- seq_len(length(ids))
chunk_size <- 50
chks <- split(rws, ceiling(seq_along(rws) / chunk_size))
cls <- list()
for (i in seq_along(chks)) {
cls[[i]] <- taxize::classification(ids_u[chks[[i]]], db = db)
}
cls <- unlist(cls, recursive = FALSE)
return(rbind(structure(cls, class = "classification")))
}
18 changes: 15 additions & 3 deletions R/tv_summarise.R
Expand Up @@ -10,13 +10,16 @@
#' data.table = FALSE))
#' out <- tv_clean_ids(x, ids = dat$id, db = "ncbi")
#' (res <- tv_summarise(out))
#' res$summary
#' res$by_rank
#' res$by_rank_name
#' res$by_within_rank
tv_summarise <- function(x) {
# must be a data.frame
assert(x, "data.frame")
x <- tbl_df(x)
# summary data
sumdat <- length(unique(x$query))
# by rank
rank <- x %>%
group_by(rank) %>%
Expand All @@ -37,15 +40,24 @@ tv_summarise <- function(x) {
group_by(rank) %>%
select(-percent) %>%
mutate(percent = round((count / sum(count)) * 100)) %>%
nest() %>%
unlist(recursive = FALSE) %>%
unname
nest()
within_rank <- stats::setNames(within_rank$data, within_rank$rank)

# compile output
out <- list(
summary = list(spp = sumdat),
by_rank = rank,
by_rank_name = rank_name,
by_within_rank = within_rank
)
return(structure(out, class = "tv_summary"))
}

#' @export
print.tv_summary <- function(x, ...) {
cat("<tv_summary>", sep = "\n")
cat(sprintf(" no. taxa: %s", x$summary$spp), sep = "\n")
cat(sprintf(" by rank: N (%s)", NROW(x$by_rank)), sep = "\n")
cat(sprintf(" by rank name: N (%s)", NROW(x$by_rank_name)), sep = "\n")
cat(sprintf(" within ranks: N (%s)", length(x$by_within_rank)), sep = "\n")
}
8 changes: 8 additions & 0 deletions README.Rmd
@@ -1,6 +1,14 @@
taxview
=======

```{r echo=FALSE}
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE,
warning = FALSE
)
```

taxonomy based data explorer

## install
Expand Down

0 comments on commit 405b0b4

Please sign in to comment.