Skip to content

Commit

Permalink
Merge pull request #87 from krlmlr/feature/iter-all
Browse files Browse the repository at this point in the history
- table data iterator gains new members `next_paged()`, `get_schema()` and `get_rows_fetched()` (#87. @krlmlr).
  • Loading branch information
krlmlr committed Feb 24, 2016
2 parents 94e5c3b + 45ee37c commit 2866909
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 19 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Expand Up @@ -18,4 +18,4 @@ Suggests:
testthat
License: GPL-3
LazyData: true
RoxygenNote: 5.0.1.9000
RoxygenNote: 5.0.1
4 changes: 3 additions & 1 deletion NEWS.md
@@ -1,5 +1,7 @@
# Version 0.1.0.9000

* Computation of the SQL data type that corresponds to a given R object is now more robust against unknown classes. (#95, @krlmlr)

* A data frame with full schema information is returned for zero-row results. (#88, @krlmlr)

* New `exists_table()`. (#91, @krlmlr)
Expand All @@ -10,6 +12,6 @@

* New `format_dataset()` and `format_table()`. (#81, @krlmlr)

* New `list_tabledata_iter()` that allows fetching a table in chunks of varying size. (#77, @krlmlr)
* New `list_tabledata_iter()` that allows fetching a table in chunks of varying size. (#77, #87, @krlmlr)

* Add support for API keys via the `BIGRQUERY_API_KEY` environment variable. (#49)
11 changes: 11 additions & 0 deletions R/bigrquery.r
Expand Up @@ -4,6 +4,17 @@
#' please see the project development page: \url{github.com/rstats-db/bigrquery}.
#' The most important method to get started with is \code{\link{query_exec}}.
#'
#' @section Package options:
#' \describe{
#' \item{\code{bigrquery.quiet}}{Verbose output during processing? The default
#' value, \code{NA}, turns on verbose output for queries that run longer than
#' two seconds. Use \code{TRUE} for immediate verbose output, \code{FALSE}
#' for quiet operation.}
#'
#' \item{\code{bigrquery.page.size}}{Default page size for fetching data,
#' defaults to 1e4.}
#' }
#'
#' @name bigrquery
#' @aliases bigquery
#' @docType package
Expand Down
51 changes: 40 additions & 11 deletions R/tabledata.r
Expand Up @@ -52,7 +52,8 @@ list_tabledata <- function(project, dataset, table, page_size = 1e4,
#' @export
list_tabledata_callback <- function(project, dataset, table, callback,
table_info = NULL,
page_size = 1e4, max_pages = 10,
page_size = getOption("bigrquery.page.size"),
max_pages = 10,
warn = TRUE,
quiet = getOption("bigrquery.quiet")) {
assert_that(is.string(project), is.string(dataset), is.string(table))
Expand Down Expand Up @@ -93,10 +94,6 @@ list_tabledata_callback <- function(project, dataset, table, callback,
invisible(TRUE)
}

#' @description
#' \code{list_tabledata_iter} returns a named list with components \code{next_}
#' (a function that fetches rows) and \code{is_complete} (a function that checks
#' if all rows have been fetched).
#' @rdname list_tabledata
#' @export
list_tabledata_iter <- function(project, dataset, table, table_info = NULL) {
Expand All @@ -108,7 +105,7 @@ list_tabledata_iter <- function(project, dataset, table, table_info = NULL) {
table)

last_response <- NULL
rows_fetched <- 0L
rows_fetched <- 0

next_ <- function(n) {
query <- list(maxResults = n)
Expand All @@ -117,9 +114,7 @@ list_tabledata_iter <- function(project, dataset, table, table_info = NULL) {
response <- bq_get(url, query = query)

data <- extract_data(response$rows, schema)
if (!is.null(data)) {
rows_fetched <<- rows_fetched + nrow(data)
}
rows_fetched <<- rows_fetched + nrow(data)

# Record only page token and total number of rows to reduce memory consumption
last_response <<- response[c("pageToken", "totalRows")]
Expand All @@ -128,10 +123,44 @@ list_tabledata_iter <- function(project, dataset, table, table_info = NULL) {
}

is_complete <- function() {
!is.null(last_response) && rows_fetched >= as.integer(last_response$totalRows)
!is.null(last_response) && rows_fetched >= as.numeric(last_response$totalRows)
}

next_paged <- function(n, page_size = getOption("bigrquery.page.size")) {
target_rows_fetched <- rows_fetched + n

ret <- list()
repeat {
next_n <- min(page_size, target_rows_fetched - rows_fetched)
chunk <- next_(next_n)

# This has O(n^2) aggregated run time, but fetching large data from
# BigQuery will be slow for other reasons
ret <- c(ret, list(chunk))

if (is_complete() || rows_fetched >= target_rows_fetched) {
break
}
}
do.call(rbind, ret)
}

get_schema <- function() {
schema
}

get_rows_fetched <- function() {
rows_fetched
}

list(next_ = next_, is_complete = is_complete)
#' @description
#' \code{list_tabledata_iter} returns a named list with functions \code{next_}
#' (fetches one chunk of rows), \code{next_paged} (fetches arbitrarily many
#' rows using a specified page size), \code{is_complete} (checks if all rows
#' have been fetched), \code{get_schema} (returns the schema of the table),
#' and \code{get_rows_fetched} (returns the number of rows already fetched).
list(next_ = next_, next_paged = next_paged, is_complete = is_complete,
get_schema = get_schema, get_rows_fetched = get_rows_fetched)
}

#Types can be loaded into R, record is not supported yet.
Expand Down
3 changes: 2 additions & 1 deletion R/zzz.r
@@ -1,7 +1,8 @@
.onLoad <- function(libname, pkgname) {
op <- options()
defaults <- list(
bigrquery.quiet = NA
bigrquery.quiet = NA,
bigrquery.page.size = 1e4
)
toset <- !(names(defaults) %in% names(op))
if (any(toset)) options(defaults[toset])
Expand Down
12 changes: 12 additions & 0 deletions man/bigrquery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 7 additions & 5 deletions man/list_tabledata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2866909

Please sign in to comment.