Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
#' Use dplyr verbs with a remote database table
#'
#' All data manipulation on SQL tbls are lazy: they will not actually
#' run the query or retrieve the data unless you ask for it: they all return
#' a new `tbl_dbi` object. Use [compute()] to run the query and save the
#' results in a temporary in the database, or use [collect()] to retrieve the
#' results to R. You can see the query with [show_query()].
#'
#' @details
#' For best performance, the database should have an index on the variables
#' that you are grouping by. Use [explain()] to check that the database is using
#' the indexes that you expect.
#'
#' There is one verb that is not lazy: [do()] is eager because it must pull
#' the data into R.
#'
#' @param src A `DBIConnection` object produced by `DBI::dbConnect()`.
#' @param from Either a string (giving a table name),
#' a fully qualified table name created by [in_schema()]
#' or a literal [sql()] string.
#' @param ... Passed on to [tbl_sql()]
#' @export
#' @examples
#' library(dplyr)
#'
#' # Connect to a temporary in-memory SQLite database
#' con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
#'
#' # Add some data
#' copy_to(con, mtcars)
#' DBI::dbListTables(con)
#'
#' # To retrieve a single table from a source, use `tbl()`
#' con %>% tbl("mtcars")
#'
#' # Use `in_schema()` for fully qualified table names
#' con %>% tbl(in_schema("temp", "mtcars")) %>% head(1)
#'
#' # You can also use pass raw SQL if you want a more sophisticated query
#' con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl = 8"))
#'
#' # If you just want a temporary in-memory database, use src_memdb()
#' src2 <- src_memdb()
#'
#' # To show off the full features of dplyr's database integration,
#' # we'll use the Lahman database. lahman_sqlite() takes care of
#' # creating the database.
#'
#' if (requireNamespace("Lahman", quietly = TRUE)) {
#' batting <- copy_to(con, Lahman::Batting)
#' batting
#'
#' # Basic data manipulation verbs work in the same way as with a tibble
#' batting %>% filter(yearID > 2005, G > 130)
#' batting %>% select(playerID:lgID)
#' batting %>% arrange(playerID, desc(yearID))
#' batting %>% summarise(G = mean(G), n = n())
#'
#' # There are a few exceptions. For example, databases give integer results
#' # when dividing one integer by another. Multiply by 1 to fix the problem
#' batting %>%
#' select(playerID:lgID, AB, R, G) %>%
#' mutate(
#' R_per_game1 = R / G,
#' R_per_game2 = R * 1.0 / G
#' )
#'
#' # All operations are lazy: they don't do anything until you request the
#' # data, either by `print()`ing it (which shows the first ten rows),
#' # or by `collect()`ing the results locally.
#' system.time(recent <- filter(batting, yearID > 2010))
#' system.time(collect(recent))
#'
#' # You can see the query that dplyr creates with show_query()
#' batting %>%
#' filter(G > 0) %>%
#' group_by(playerID) %>%
#' summarise(n = n()) %>%
#' show_query()
#' }
#' @importFrom dplyr tbl
#' @aliases tbl_dbi
tbl.src_dbi <- function(src, from, ...) {
subclass <- class(src$con)[[1]] # prefix added by dplyr::make_tbl
tbl_sql(c(subclass, "dbi"), src = src, from = from, ...)
}
#' Database src
#'
#' @description
#' `r lifecycle::badge("superseded")`
#'
#' Since can generate a `tbl()` directly from a DBI connection we no longer
#' recommend using `src_dbi()`.
#'
#' @param con An object that inherits from [DBI::DBIConnection-class],
#' typically generated by [DBI::dbConnect]
#' @param auto_disconnect Should the connection be automatically closed when
#' the src is deleted? Set to `TRUE` if you initialize the connection
#' the call to `src_dbi()`. Pass `NA` to auto-disconnect but print a message
#' when this happens.
#' @return An S3 object with class `src_dbi`, `src_sql`, `src`.
#' @keywords internal
#' @export
src_dbi <- function(con, auto_disconnect = FALSE) {
# Avoid registering disconnector if con can't be evaluated
force(con)
# stopifnot(is(con, "DBIConnection"))
if (is_false(auto_disconnect)) {
disco <- NULL
} else {
disco <- db_disconnector(con, quiet = is_true(auto_disconnect))
}
subclass <- paste0("src_", class(con)[[1]])
structure(
list(
con = con,
disco = disco
),
class = c(subclass, "src_dbi", "src_sql", "src")
)
}
setOldClass(c("src_dbi", "src_sql", "src"))
# Creates an environment that disconnects the database when it's GC'd
db_disconnector <- function(con, quiet = FALSE) {
reg.finalizer(environment(), function(...) {
if (!quiet) {
message("Auto-disconnecting ", class(con)[[1]])
}
dbDisconnect(con)
})
environment()
}