From e2570cac680da3de79c20e93f2f90702789b5a6e Mon Sep 17 00:00:00 2001 From: Dominic Bennett Date: Tue, 30 Oct 2018 16:31:12 +0100 Subject: [PATCH] handle large numbers of txids --- R/stage1.R | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/R/stage1.R b/R/stage1.R index 50f9d39..6296f13 100644 --- a/R/stage1.R +++ b/R/stage1.R @@ -41,23 +41,37 @@ taxise_run <- function(wd) { #' @return vector of ids #' @family run-private txids_get <- function(ps, retmax = 1E4) { - # TODO: handle multiple txids - trm <- paste0(paste0('txid', ps[['txid']],'[Subtree]'), collapse = ' OR ') - args <- list(db = 'taxonomy', term = trm, retmax = retmax) - srch_rs <- search_and_cache(func = rentrez::entrez_search, - args = args, fnm = 'search', ps = ps) - txcnt <- srch_rs[['count']] - txids <- srch_rs[['ids']] - if (txcnt <= retmax) { - return(txids) - } - ret_strts <- seq(from = retmax, to = txcnt, by = retmax) - for (ret_strt in ret_strts) { - args <- list(db = 'taxonomy', term = trm, retmax = retmax, - retstart = ret_strt) + .get <- function(txid) { + trm <- paste0(paste0('txid', txid,'[Subtree]'), collapse = ' OR ') + args <- list(db = 'taxonomy', term = trm, retmax = retmax) srch_rs <- search_and_cache(func = rentrez::entrez_search, args = args, fnm = 'search', ps = ps) - txids <- c(txids, srch_rs[['ids']]) + txcnt <- srch_rs[['count']] + txids <- srch_rs[['ids']] + if (txcnt <= retmax) { + return(txids) + } + ret_strts <- seq(from = retmax, to = txcnt, by = retmax) + for (ret_strt in ret_strts) { + args <- list(db = 'taxonomy', term = trm, retmax = retmax, + retstart = ret_strt) + srch_rs <- search_and_cache(func = rentrez::entrez_search, + args = args, fnm = 'search', ps = ps) + txids <- c(txids, srch_rs[['ids']]) + } + txids + } + ntxids <- length(ps[['txid']]) + if (ntxids > 10) { + indxs <- seq(from = 0, to = ntxids, by = 10) + txids <- NULL + for (i in seq_along(indxs[-1])) { + i1 <- indxs[i] + 1 + i2 <- indxs[i + 1] + txids <- c(txids, .get(ps[['txid']][i1:i2])) + } + } else { + txids <- .get(ps[['txid']]) } txids }