Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
136 lines (130 sloc) 4.51 KB
#' dplyr backend for any DBI-compatible database
#'
#' @description
#' `src_dbi()` is a general dplyr backend that connects to any
#' DBI driver. `src_memdb()` connects to a temporary in-memory SQLite
#' database, that's useful for testing and experimenting.
#'
#' You can generate a `tbl()` directly from the DBI connection, or
#' go via `src_dbi()`.
#'
#' @details
#' All data manipulation on SQL tbls are lazy: they will not actually
#' run the query or retrieve the data unless you ask for it: they all return
#' a new `tbl_dbi` object. Use [compute()] to run the query and save the
#' results in a temporary in the database, or use [collect()] to retrieve the
#' results to R. You can see the query with [show_query()].
#'
#' For best performance, the database should have an index on the variables
#' that you are grouping by. Use [explain()] to check that the database is using
#' the indexes that you expect.
#'
#' There is one exception: [do()] is not lazy since it must pull the data
#' into R.
#'
#' @param con An object that inherits from [DBI::DBIConnection-class],
#' typically generated by [DBI::dbConnect]
#' @param auto_disconnect Should the connection be automatically closed when
#' the src is deleted? Set to `TRUE` if you initialize the connection
#' the call to `src_dbi()`. Pass `NA` to auto-disconnect but print a message
#' when this happens.
#' @return An S3 object with class `src_dbi`, `src_sql`, `src`.
#' @export
#' @examples
#' # Basic connection using DBI -------------------------------------------
#' library(dplyr)
#'
#' con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
#' src <- src_dbi(con, auto_disconnect = TRUE)
#'
#' # Add some data
#' copy_to(src, mtcars)
#' src
#' DBI::dbListTables(con)
#'
#' # To retrieve a single table from a source, use `tbl()`
#' src %>% tbl("mtcars")
#'
#' # You can also use pass raw SQL if you want a more sophisticated query
#' src %>% tbl(sql("SELECT * FROM mtcars WHERE cyl = 8"))
#'
#' # Alternatively, you can use the `src_sqlite()` helper
#' src2 <- src_sqlite(":memory:", create = TRUE)
#'
#' # If you just want a temporary in-memory database, use src_memdb()
#' src3 <- src_memdb()
#'
#' # To show off the full features of dplyr's database integration,
#' # we'll use the Lahman database. lahman_sqlite() takes care of
#' # creating the database.
#'
#' if (has_lahman("sqlite")) {
#' lahman_p <- lahman_sqlite()
#' batting <- lahman_p %>% tbl("Batting")
#' batting
#'
#' # Basic data manipulation verbs work in the same way as with a tibble
#' batting %>% filter(yearID > 2005, G > 130)
#' batting %>% select(playerID:lgID)
#' batting %>% arrange(playerID, desc(yearID))
#' batting %>% summarise(G = mean(G), n = n())
#'
#' # There are a few exceptions. For example, databases give integer results
#' # when dividing one integer by another. Multiply by 1 to fix the problem
#' batting %>%
#' select(playerID:lgID, AB, R, G) %>%
#' mutate(
#' R_per_game1 = R / G,
#' R_per_game2 = R * 1.0 / G
#' )
#'
#' # All operations are lazy: they don't do anything until you request the
#' # data, either by `print()`ing it (which shows the first ten rows),
#' # or by `collect()`ing the results locally.
#' system.time(recent <- filter(batting, yearID > 2010))
#' system.time(collect(recent))
#'
#' # You can see the query that dplyr creates with show_query()
#' batting %>%
#' filter(G > 0) %>%
#' group_by(playerID) %>%
#' summarise(n = n()) %>%
#' show_query()
#' }
src_dbi <- function(con, auto_disconnect = FALSE) {
# stopifnot(is(con, "DBIConnection"))
if (is_false(auto_disconnect)) {
disco <- NULL
} else {
disco <- db_disconnector(con, quiet = is_true(auto_disconnect))
}
subclass <- paste0("src_", class(con)[[1]])
structure(
list(
con = con,
disco = disco
),
class = c(subclass, "src_dbi", "src_sql", "src")
)
}
setOldClass(c("src_dbi", "src_sql", "src"))
#' @export
#' @aliases tbl_dbi
#' @rdname src_dbi
#' @param src Either a `src_dbi` or `DBIConnection`
#' @param from Either a string (giving a table name) or literal [sql()].
#' @param ... Needed for compatibility with generic; currently ignored.
tbl.src_dbi <- function(src, from, ...) {
subclass <- class(src$con)[[1]] # prefix added by dplyr::make_tbl
tbl_sql(c(subclass, "dbi"), src = src, from = from)
}
# Creates an environment that disconnects the database when it's GC'd
db_disconnector <- function(con, quiet = FALSE) {
reg.finalizer(environment(), function(...) {
if (!quiet) {
message("Auto-disconnecting ", class(con)[[1]])
}
dbDisconnect(con)
})
environment()
}