Skip to content

Commit

Permalink
cleaned up classification cbind and rbind generic extensions, #332
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Oct 2, 2014
1 parent f12978a commit 41868d8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 36 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Imports:
data.table,
vegan,
assertthat,
bold,
bold
Suggests:
testthat,
roxygen2,
Expand Down
64 changes: 36 additions & 28 deletions R/classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@
#'
#' @return A named list of data.frames with the taxonomic classifcation of
#' every supplied taxa.
#' @note If IDs are supplied directly (not from the \code{get_*} functions) you
#' must specify the type of ID. There is a timeout of 1/3 seconds between
#' querries to NCBI.
#' @details If IDs are supplied directly (not from the \code{get_*} functions) you
#' must specify the type of ID. There is a timeout of 1/3 seconds between
#' querries to NCBI.
#'
#' BEWARE: Right now, NBN doesn't return the queried taxon in the classification. But you can
#' attach it yourself quite easily of course. This behavior is different from the other data
#' sources.
#'
#' @seealso \code{\link[taxize]{get_tsn}}, \code{\link[taxize]{get_uid}},
#' \code{\link[taxize]{get_eolid}}, \code{\link[taxize]{get_colid}},
Expand Down Expand Up @@ -77,13 +81,15 @@
#' # rbind and cbind on class classification (from a call to get_colid, get_tsn, etc.
#' # - other than get_ids)
#' cl_col <- classification(get_colid(c("Puma concolor","Accipiter striatus")))
#' cl_uid <- classification(get_uid(c("Puma concolor","Accipiter striatus")))
#' cl_tsn <- classification(get_tsn(c("Puma concolor","Accipiter striatus")))
#' rbind(cl_col)
#' rbind(cl_uid)
#' rbind(cl_tsn)
#' cbind(cl_col)
#'
#' cl_uid <- classification(get_uid(c("Puma concolor","Accipiter striatus")))
#' rbind(cl_uid)
#' cbind(cl_uid)
#'
#' cl_tsn <- classification(get_tsn(c("Puma concolor","Accipiter striatus")))
#' rbind(cl_tsn)
#' cbind(cl_tsn)
#'
#' tsns <- get_tsn(c("Puma concolor","Accipiter striatus"))
Expand Down Expand Up @@ -397,7 +403,10 @@ cbind.classification <- function(x)
}
input <- x
input <- input[sapply(input, class) %in% "data.frame"]
do.call(rbind.fill, lapply(input, gethiernames))
tmp <- do.call(rbind.fill, lapply(input, gethiernames))
tmp$query <- names(x)
tmp$db <- attr(x, "db")
tmp
}

#' @method rbind classification
Expand All @@ -408,9 +417,11 @@ rbind.classification <- function(x)
input <- x
db <- attr(input, "db")
x <- input[vapply(x, class, "") %in% "data.frame"]
for(i in seq_along(x)){
x[[i]]$query <- names(x[i])
}
df <- do.call(rbind.fill, x)
df <- data.frame(source = db, taxonid = gsub("\\.[0-9]+", "", row.names(df)), df)
row.names(df) <- NULL
df$db <- db
return( df )
}

Expand All @@ -430,15 +441,19 @@ cbind.classification_ids <- function(...)
names(values) <- tolower(x[,'rank'])
return( values )
}
do.call(rbind.fill, lapply(input, function(x){
tmp <- lapply(x, gethiernames)
do.call(rbind.fill, tmp)
dat <- do.call(rbind.fill, lapply(input, function(h){
tmp <- lapply(h, gethiernames)
tmp <- do.call(rbind.fill, tmp)
tmp$query <- names(h)
tmp$db <- attr(h, "db")
tmp
})
)
# # sort columns by rank order
# rank_ref$ranks[names(df) %in% tolower(rank_ref$ranks)]
# grep(names(values)[[2]], tolower(rank_ref$ranks))
# torank <- sapply(rank_ref[grep(downto, rank_ref$ranks):nrow(rank_ref),"ranks"], function(x) strsplit(x, ",")[[1]][[1]], USE.NAMES=F)
move_col(tt=dat, y=c('query','db'))
}

move_col <- function(tt, y){
tt[ c(names(tt)[ - sapply(y, function(m) grep(m, names(tt))) ], y) ]
}

#' @method rbind classification_ids
Expand Down Expand Up @@ -471,18 +486,11 @@ rbind.classification_ids <- function(...)
tmp <- do.call(rbind, lapply(df, "[[", i))
source2 <- gsub("\\.[0-9]+", "", row.names(tmp))
row.names(tmp) <- NULL
names(tmp)[1] <- "taxonid"
tmp <- data.frame(source = source2, tmp)
names(tmp)[1] <- "query"
tmp <- data.frame(db = source2, tmp)
get[[i]] <- tmp
}

if(length(get) == 1)
get[[1]]
else
get
# source2 <- gsub("\\.[0-9]+", "", row.names(df))
# row.names(df) <- NULL
# names(df)[1] <- "taxonid"
# df <- data.frame(source = source2, df)
# return( res )
tt <- if(length(get) == 1) get[[1]] else get
move_col(tt, c('query','db'))
}
21 changes: 14 additions & 7 deletions man/classification.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,14 @@ A named list of data.frames with the taxonomic classifcation of
\description{
Retrieve the taxonomic hierarchy for a given taxon ID.
}
\note{
\details{
If IDs are supplied directly (not from the \code{get_*} functions) you
must specify the type of ID. There is a timeout of 1/3 seconds between
querries to NCBI.
must specify the type of ID. There is a timeout of 1/3 seconds between
querries to NCBI.
BEWARE: Right now, NBN doesn't return the queried taxon in the classification. But you can
attach it yourself quite easily of course. This behavior is different from the other data
sources.
}
\examples{
\donttest{
Expand Down Expand Up @@ -129,13 +133,15 @@ cbind(cl)
# rbind and cbind on class classification (from a call to get_colid, get_tsn, etc.
# - other than get_ids)
cl_col <- classification(get_colid(c("Puma concolor","Accipiter striatus")))
cl_uid <- classification(get_uid(c("Puma concolor","Accipiter striatus")))
cl_tsn <- classification(get_tsn(c("Puma concolor","Accipiter striatus")))
rbind(cl_col)
rbind(cl_uid)
rbind(cl_tsn)
cbind(cl_col)

cl_uid <- classification(get_uid(c("Puma concolor","Accipiter striatus")))
rbind(cl_uid)
cbind(cl_uid)

cl_tsn <- classification(get_tsn(c("Puma concolor","Accipiter striatus")))
rbind(cl_tsn)
cbind(cl_tsn)

tsns <- get_tsn(c("Puma concolor","Accipiter striatus"))
Expand All @@ -144,6 +150,7 @@ cbind(cl_tsns)

# NBN data
res <- classification(c("Alopias vulpinus","Pinus sylvestris"), db = 'nbn')
rbind(res)
cbind(res)
}
}
Expand Down

0 comments on commit 41868d8

Please sign in to comment.