diff --git a/.github/workflows/custom/after-install/action.yml b/.github/workflows/custom/after-install/action.yml new file mode 100644 index 000000000..937993668 --- /dev/null +++ b/.github/workflows/custom/after-install/action.yml @@ -0,0 +1,24 @@ +name: 'Custom steps to run after R packages are installed' + +runs: + using: "composite" + steps: + # Must happen after R is installed... + - name: Define R CMD check error condition + run: | + if (packageVersion("arrow") < "8.0.0") { + message("Setting RCMDCHECK_ERROR_ON = error") + cat('RCMDCHECK_ERROR_ON="error"\n', file = Sys.getenv("GITHUB_ENV"), append = TRUE) + } else if (getRversion() < "3.5") { + message("Setting RCMDCHECK_ERROR_ON = warning") + cat('RCMDCHECK_ERROR_ON="warning"\n', file = Sys.getenv("GITHUB_ENV"), append = TRUE) + } + shell: Rscript {0} + + - name: Define _R_CHECK_FORCE_SUGGESTS_ + run: | + if (getRversion() < "3.5") { + message("Setting _R_CHECK_FORCE_SUGGESTS_") + cat('_R_CHECK_FORCE_SUGGESTS_=false\n', file = Sys.getenv("GITHUB_ENV"), append = TRUE) + } + shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index e4aceb9b1..46cf13dc8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,10 +18,11 @@ Depends: Imports: blob (>= 1.2.0), callr, - DBI (>= 1.1.3), + DBI (>= 1.1.3.9004), desc, hms (>= 0.5.0), lubridate, + magrittr, methods, palmerpenguins, R6, @@ -31,10 +32,12 @@ Imports: vctrs, withr Suggests: + arrow, clipr, dblog (>= 0.0.0.9008), debugme, devtools, + dplyr, knitr, lintr, rmarkdown, @@ -42,7 +45,7 @@ Suggests: VignetteBuilder: knitr Remotes: - r-dbi/dblog + r-dbi/DBI Additional_repositories: https://r-dbi.r-universe.dev Config/autostyle/scope: line_breaks Config/autostyle/strict: false @@ -102,6 +105,15 @@ Collate: 'spec-meta-get-rows-affected.R' 'spec-transaction-begin-commit-rollback.R' 'spec-transaction-with-transaction.R' + 'spec-arrow-send-query-arrow.R' + 'spec-arrow-fetch-arrow.R' + 'spec-arrow-get-query-arrow.R' + 'spec-arrow-read-table-arrow.R' + 'spec-arrow-write-table-arrow.R' + 'spec-arrow-create-table-arrow.R' + 'spec-arrow-append-table-arrow.R' + 'spec-arrow-bind.R' + 'spec-arrow-roundtrip.R' 'spec-driver-get-info.R' 'spec-connection-get-info.R' 'spec-sql-list-fields.R' @@ -112,6 +124,7 @@ Collate: 'spec-result.R' 'spec-sql.R' 'spec-meta.R' + 'spec-arrow.R' 'spec-transaction.R' 'spec-compliance.R' 'spec-stress-connection.R' @@ -126,6 +139,7 @@ Collate: 'test-sql.R' 'test-meta.R' 'test-transaction.R' + 'test-arrow.R' 'test-compliance.R' 'test-stress.R' 'tweaks.R' diff --git a/NAMESPACE b/NAMESPACE index 5c3fd7899..decce6c99 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(get_default_context) export(make_context) export(set_default_context) export(test_all) +export(test_arrow) export(test_compliance) export(test_connection) export(test_driver) @@ -18,50 +19,11 @@ export(test_sql) export(test_stress) export(test_transaction) export(tweaks) +import(DBI) import(testthat) -importFrom(DBI,Id) -importFrom(DBI,SQL) -importFrom(DBI,dbAppendTable) -importFrom(DBI,dbBegin) -importFrom(DBI,dbBind) -importFrom(DBI,dbBreak) -importFrom(DBI,dbCallProc) -importFrom(DBI,dbClearResult) -importFrom(DBI,dbColumnInfo) -importFrom(DBI,dbCommit) -importFrom(DBI,dbConnect) -importFrom(DBI,dbCreateTable) -importFrom(DBI,dbDataType) -importFrom(DBI,dbDisconnect) -importFrom(DBI,dbExecute) -importFrom(DBI,dbExistsTable) -importFrom(DBI,dbFetch) -importFrom(DBI,dbGetDBIVersion) -importFrom(DBI,dbGetInfo) -importFrom(DBI,dbGetQuery) -importFrom(DBI,dbGetRowCount) -importFrom(DBI,dbGetRowsAffected) -importFrom(DBI,dbGetStatement) -importFrom(DBI,dbHasCompleted) -importFrom(DBI,dbIsValid) -importFrom(DBI,dbListConnections) -importFrom(DBI,dbListFields) -importFrom(DBI,dbListObjects) -importFrom(DBI,dbListTables) -importFrom(DBI,dbQuoteIdentifier) -importFrom(DBI,dbQuoteLiteral) -importFrom(DBI,dbQuoteString) -importFrom(DBI,dbReadTable) -importFrom(DBI,dbRemoveTable) -importFrom(DBI,dbRollback) -importFrom(DBI,dbSendQuery) -importFrom(DBI,dbSendStatement) -importFrom(DBI,dbSetDataMappings) -importFrom(DBI,dbUnquoteIdentifier) -importFrom(DBI,dbWithTransaction) -importFrom(DBI,dbWriteTable) importFrom(callr,r) importFrom(lubridate,with_tz) +importFrom(magrittr,"%>%") importFrom(methods,extends) importFrom(methods,findMethod) importFrom(methods,getClass) diff --git a/R/dbi.R b/R/dbi.R index 05ec05c1a..7af38276e 100644 --- a/R/dbi.R +++ b/R/dbi.R @@ -36,13 +36,13 @@ dbi_generics <- function(version) { if (version < "1.7.4") { generics <- setdiff(generics, c( - "dbGetStream", - "dbAppendStream", - "dbStream", - "dbWriteStream", - "dbSendQueryStream", - "dbStreamTable", - "dbCreateFromStream" + "dbGetQueryArrow", + "dbAppendTableArrow", + "dbFetchArrow", + "dbWriteTableArrow", + "dbSendQueryArrow", + "dbReadTableArrow", + "dbCreateTableArrow" )) } diff --git a/R/expectations.R b/R/expectations.R index 5e206813c..912ff677d 100644 --- a/R/expectations.R +++ b/R/expectations.R @@ -80,6 +80,10 @@ expect_equal_df <- function(actual, expected) { expect_identical(actual, expected) } +expect_equal_arrow <- function(actual, expected) { + expect_equal_df(as.data.frame(actual), as.data.frame(expected)) +} + skip_if_not_dbitest <- function(ctx, version) { if (as.package_version(ctx$tweaks$dbitest_version) < version) { skip(paste0("tweak: dbitest_version: required: ", version, ", available: ", ctx$tweaks$dbitest_version)) diff --git a/R/generics.R b/R/generics.R index d9507e362..77e0cb245 100644 --- a/R/generics.R +++ b/R/generics.R @@ -2,8 +2,8 @@ all_dbi_generics <- function() { c( "Id", - "dbAppendStream", "dbAppendTable", + "dbAppendTableArrow", "dbBegin", "dbBind", "dbCanConnect", @@ -11,19 +11,20 @@ all_dbi_generics <- function() { "dbColumnInfo", "dbCommit", "dbConnect", - "dbCreateFromStream", "dbCreateTable", + "dbCreateTableArrow", "dbDataType", "dbDisconnect", "dbExecute", "dbExistsTable", "dbFetch", + "dbFetchArrow", "dbGetInfo", "dbGetQuery", + "dbGetQueryArrow", "dbGetRowCount", "dbGetRowsAffected", "dbGetStatement", - "dbGetStream", "dbHasCompleted", "dbIsReadOnly", "dbIsValid", @@ -34,17 +35,16 @@ all_dbi_generics <- function() { "dbQuoteLiteral", "dbQuoteString", "dbReadTable", + "dbReadTableArrow", "dbRemoveTable", "dbRollback", "dbSendQuery", - "dbSendQueryStream", + "dbSendQueryArrow", "dbSendStatement", - "dbStream", - "dbStreamTable", "dbUnquoteIdentifier", "dbWithTransaction", - "dbWriteStream", "dbWriteTable", + "dbWriteTableArrow", NULL ) } diff --git a/R/import-dbi.R b/R/import-dbi.R index 0554de2a2..4a9999178 100644 --- a/R/import-dbi.R +++ b/R/import-dbi.R @@ -1,13 +1,2 @@ -# The imports below were generated using the following call: -# @import.gen::importFrom("DBI") -#' @importFrom DBI dbAppendTable dbBegin dbBind dbBreak dbCallProc dbClearResult dbColumnInfo -#' @importFrom DBI dbCommit dbConnect dbCreateTable dbDataType dbDisconnect -#' @importFrom DBI dbExecute dbExistsTable dbFetch dbGetDBIVersion -#' @importFrom DBI dbGetInfo dbGetQuery dbGetRowCount dbGetRowsAffected -#' @importFrom DBI dbGetStatement dbHasCompleted dbIsValid -#' @importFrom DBI dbListConnections dbListFields dbListObjects dbListTables -#' @importFrom DBI dbQuoteIdentifier dbQuoteLiteral dbQuoteString dbReadTable dbRemoveTable -#' @importFrom DBI dbRollback dbSendQuery dbSendStatement dbSetDataMappings -#' @importFrom DBI dbUnquoteIdentifier dbWithTransaction dbWriteTable -#' @importFrom DBI Id SQL +#' @import DBI NULL diff --git a/R/import-testthat.R b/R/import-testthat.R index 2821440d0..1735522a3 100644 --- a/R/import-testthat.R +++ b/R/import-testthat.R @@ -7,4 +7,5 @@ NULL #' @importFrom methods findMethod getClasses getClass extends #' @importFrom stats setNames #' @importFrom utils head +#' @importFrom magrittr %>% NULL diff --git a/R/spec-.R b/R/spec-.R index 1162e14a3..ffc743790 100644 --- a/R/spec-.R +++ b/R/spec-.R @@ -1,7 +1,7 @@ # reverse order # Script to create new spec files from subspec names read from clipboard: -# xclip -out -se c | sed 's/,//' | for i in $(cat); do f=$(echo $i | sed 's/_/-/g;s/$/.R/'); echo "$i <- list(" > R/$f; echo ")" >> R/$f; echo "#' @include $f"; done | tac +# pbpaste | gsed 's/,//' | for i in $(cat); do f=$(echo $i | gsed 's/_/-/g;s/$/.R/'); echo "$i <- list(" > R/$f; echo ")" >> R/$f; echo "#' @include $f"; done | tac | pbcopy # # Example input: # test_xxx_1, @@ -17,6 +17,7 @@ ##### Aggregators #' @include spec-compliance.R #' @include spec-transaction.R +#' @include spec-arrow.R #' @include spec-meta.R #' @include spec-sql.R #' @include spec-result.R @@ -28,6 +29,16 @@ #' @include spec-sql-list-fields.R #' @include spec-connection-get-info.R #' @include spec-driver-get-info.R +##### Arrow +#' @include spec-arrow-roundtrip.R +#' @include spec-arrow-bind.R +#' @include spec-arrow-append-table-arrow.R +#' @include spec-arrow-create-table-arrow.R +#' @include spec-arrow-write-table-arrow.R +#' @include spec-arrow-read-table-arrow.R +#' @include spec-arrow-get-query-arrow.R +#' @include spec-arrow-fetch-arrow.R +#' @include spec-arrow-send-query-arrow.R ##### Method specs #' @include spec-transaction-with-transaction.R #' @include spec-transaction-begin-commit-rollback.R diff --git a/R/spec-all.R b/R/spec-all.R index 69f482b05..615c55308 100644 --- a/R/spec-all.R +++ b/R/spec-all.R @@ -6,6 +6,7 @@ spec_all <- c( spec_sql, spec_meta, spec_transaction, + spec_arrow, spec_compliance, spec_stress ) diff --git a/R/spec-arrow-append-table-arrow.R b/R/spec-arrow-append-table-arrow.R new file mode 100644 index 000000000..9414ca35f --- /dev/null +++ b/R/spec-arrow-append-table-arrow.R @@ -0,0 +1,594 @@ +#' spec_arrow_append_table_arrow +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +spec_arrow_append_table_arrow <- list( + arrow_append_table_arrow_formals = function() { + # + expect_equal(names(formals(dbAppendTableArrow)), c("conn", "name", "value", "...")) + }, + + arrow_append_table_arrow_return = function(con, table_name) { + skip("Failed in SQLite") + + #' @return + #' `dbAppendTableArrow()` returns a + test_in <- stream_frame(trivial_df()) + dbCreateTableArrow(con, table_name, test_in) + ret <- dbAppendTableArrow(con, table_name, test_in) + + #' scalar + expect_equal(length(ret), 1) + #' numeric. + expect_true(is.numeric(ret)) + }, + + #' + arrow_append_table_arrow_missing = function(con, table_name) { + #' @section Failure modes: + #' If the table does not exist, + expect_false(dbExistsTable(con, table_name)) + expect_error(dbAppendTableArrow(con, table_name, stream_frame(a = 2L))) + }, + + arrow_append_table_arrow_invalid_value = function(con, table_name) { + #' or the new data in `values` is not a data frame or has different column names, + #' an error is raised; the remote table remains unchanged. + test_in <- trivial_df() + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + expect_error(dbAppendTableArrow(con, table_name, test_in %>% stream_frame() %>% unclass())) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_arrow(test_out, test_in[0, , drop = FALSE]) + }, + + arrow_append_table_arrow_append_incompatible = function(con, table_name) { + test_in <- trivial_df() + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame()) + expect_error(dbAppendTableArrow(con, table_name, stream_frame(b = 2L))) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_arrow(test_out, test_in) + }, + + #' + arrow_append_table_arrow_closed_connection = function(ctx, closed_con) { + #' An error is raised when calling this method for a closed + expect_error(dbAppendTableArrow(closed_con, "test", stream_frame(a = 1))) + }, + + arrow_append_table_arrow_invalid_connection = function(ctx, invalid_con) { + #' or invalid connection. + expect_error(dbAppendTableArrow(invalid_con, "test", stream_frame(a = 1))) + }, + + arrow_append_table_arrow_error = function(con, table_name) { + #' An error is also raised + test_in <- stream_frame(a = 1L) + #' if `name` cannot be processed with [dbQuoteIdentifier()] + expect_error(dbAppendTableArrow(con, NA, test_in)) + #' or if this results in a non-scalar. + expect_error(dbAppendTableArrow(con, c("test", "test"), test_in)) + }, + + #' + arrow_append_table_arrow_roundtrip_keywords = function(con) { + skip("Requires dbBind() on RMariaDB") + + #' @section Specification: + #' SQL keywords can be used freely in table names, column names, and data. + tbl_in <- data.frame( + select = "unique", from = "join", where = "order", + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in, name = "exists") + }, + + arrow_append_table_arrow_roundtrip_quotes = function(ctx, con, table_name) { + skip("Requires dbBind() on RMariaDB") + + #' Quotes, commas, spaces, and other special characters such as newlines and tabs, + #' can also be used in the data, + tbl_in <- data.frame( + as.character(dbQuoteString(con, "")), + as.character(dbQuoteIdentifier(con, "")), + "with space", + "a,b", "a\nb", "a\tb", "a\rb", "a\bb", + "a\\Nb", "a\\tb", "a\\rb", "a\\bb", "a\\Zb", + stringsAsFactors = FALSE + ) + + names(tbl_in) <- letters[seq_along(tbl_in)] + test_arrow_roundtrip(con, tbl_in, use_append = TRUE) + }, + + arrow_append_table_arrow_roundtrip_quotes_table_names = function(ctx, con) { + #' and, if the database supports non-syntactic identifiers, + #' also for table names + if (isTRUE(ctx$tweaks$strict_identifier)) { + skip("tweak: strict_identifier") + } + + table_names <- c( + as.character(dbQuoteIdentifier(con, "")), + as.character(dbQuoteString(con, "")), + "with space", + "a,b", "a\nb", "a\tb", "a\rb", "a\bb", + "a\\Nb", "a\\tb", "a\\rb", "a\\bb", "a\\Zb" + ) + + tbl_in <- data.frame(trivial_df()) + + for (table_name in table_names) { + test_arrow_roundtrip_one(con, tbl_in, use_append = TRUE, .add_na = FALSE) + } + }, + + arrow_append_table_arrow_roundtrip_quotes_column_names = function(ctx, con) { + #' and column names. + if (isTRUE(ctx$tweaks$strict_identifier)) { + skip("tweak: strict_identifier") + } + + column_names <- c( + as.character(dbQuoteIdentifier(con, "")), + as.character(dbQuoteString(con, "")), + "with space", + "a,b", "a\nb", "a\tb", "a\rb", "a\bb", + "a\\nb", "a\\tb", "a\\rb", "a\\bb", "a\\zb" + ) + + tbl_in <- data.frame(trivial_df(length(column_names), column_names)) + + test_arrow_roundtrip_one(con, tbl_in, use_append = TRUE, .add_na = FALSE) + }, + + #' + arrow_append_table_arrow_roundtrip_integer = function(con) { + #' The following data types must be supported at least, + #' and be read identically with [dbReadTable()]: + #' - integer + tbl_in <- data.frame(a = c(1:5)) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in) + }, + + arrow_append_table_arrow_roundtrip_numeric = function(con) { + #' - numeric + tbl_in <- data.frame(a = c(seq(1, 3, by = 0.5))) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in) + #' (the behavior for `Inf` and `NaN` is not specified) + }, + + arrow_append_table_arrow_roundtrip_logical = function(ctx, con) { + skip("Fails in adbc") + + #' - logical + tbl_in <- data.frame(a = c(TRUE, FALSE, NA)) + tbl_exp <- tbl_in + tbl_exp$a <- ctx$tweaks$logical_return(tbl_exp$a) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in, tbl_exp) + }, + + arrow_append_table_arrow_roundtrip_null = function(con) { + #' - `NA` as NULL + tbl_in <- data.frame(a = NA) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(tbl_out) { + tbl_out$a <- as.logical(tbl_out$a) # Plain NA is of type logical + tbl_out + } + ) + }, + + #' - 64-bit values (using `"bigint"` as field type); the result can be + arrow_append_table_arrow_roundtrip_64_bit_numeric = function(ctx, con) { + tbl_in <- data.frame(a = c(-1e14, 1e15)) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(tbl_out) { + #' - converted to a numeric, which may lose precision, + tbl_out$a <- as.numeric(tbl_out$a) + tbl_out + } + ) + }, + # + arrow_append_table_arrow_roundtrip_64_bit_character = function(ctx, con) { + skip("Failed in SQLite") + + tbl_in <- data.frame(a = c(-1e14, 1e15)) + tbl_exp <- tbl_in + tbl_exp$a <- format(tbl_exp$a, scientific = FALSE) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, tbl_exp, + transform = function(tbl_out) { + #' - converted a character vector, which gives the full decimal + #' representation + tbl_out$a <- as.character(tbl_out$a) + tbl_out + } + ) + }, + # + arrow_append_table_arrow_roundtrip_64_bit_roundtrip = function(con, table_name) { + skip("Requires dbBind() on RMariaDB") + + tbl_in <- data.frame(a = c(-1e14, 1e15)) + dbWriteTable(con, table_name, tbl_in, field.types = c(a = "BIGINT")) + tbl_out <- dbReadTable(con, table_name) + #' - written to another table and read again unchanged + test_arrow_roundtrip(use_append = TRUE, con, tbl_out, tbl_expected = tbl_out) + }, + + arrow_append_table_arrow_roundtrip_character = function(con) { + skip("Requires dbBind() on RMariaDB") + + #' - character (in both UTF-8 + tbl_in <- data.frame( + id = seq_along(get_texts()), + a = get_texts(), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in) + }, + + arrow_append_table_arrow_roundtrip_character_native = function(con) { + skip("Requires dbBind() on RMariaDB") + + #' and native encodings), + tbl_in <- data.frame( + a = c(enc2native(get_texts())), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in) + }, + + arrow_append_table_arrow_roundtrip_character_empty = function(con) { + #' supporting empty strings + tbl_in <- data.frame( + a = c("", "a"), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in) + }, + + arrow_append_table_arrow_roundtrip_character_empty_after = function(con) { + #' (before and after non-empty strings) + tbl_in <- data.frame( + a = c("a", ""), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(use_append = TRUE, con, tbl_in) + }, + + arrow_append_table_arrow_roundtrip_factor = function(con) { + skip("Failed in SQLite") + + #' - factor (returned as character, + tbl_in <- data.frame( + a = factor(get_texts()) + ) + tbl_exp <- tbl_in + tbl_exp$a <- as.character(tbl_exp$a) + #' with a warning) + suppressWarnings( + expect_warning( + test_arrow_roundtrip(use_append = TRUE, con, tbl_in, tbl_exp) + ) + ) + }, + + arrow_append_table_arrow_roundtrip_raw = function(ctx, con) { + skip("Failed in SQLite") + + #' - list of raw + #' (if supported by the database) + if (isTRUE(ctx$tweaks$omit_blob_tests)) { + skip("tweak: omit_blob_tests") + } + + tbl_in <- data.frame(id = 1L, a = I(list(as.raw(0:10)))) + tbl_exp <- tbl_in + tbl_exp$a <- blob::as_blob(unclass(tbl_in$a)) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, tbl_exp, + transform = function(tbl_out) { + tbl_out$a <- blob::as_blob(tbl_out$a) + tbl_out + } + ) + }, + + arrow_append_table_arrow_roundtrip_blob = function(ctx, con) { + skip("Failed in SQLite") + + #' - objects of type [blob::blob] + #' (if supported by the database) + if (isTRUE(ctx$tweaks$omit_blob_tests)) { + skip("tweak: omit_blob_tests") + } + + tbl_in <- data.frame(id = 1L, a = blob::blob(as.raw(0:10))) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(tbl_out) { + tbl_out$a <- blob::as_blob(tbl_out$a) + tbl_out + } + ) + }, + + arrow_append_table_arrow_roundtrip_date = function(ctx, con) { + #' - date + #' (if supported by the database; + if (!isTRUE(ctx$tweaks$date_typed)) { + skip("tweak: !date_typed") + } + + #' returned as `Date`) + tbl_in <- data.frame(a = as_numeric_date(c(Sys.Date() + 1:5))) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(tbl_out) { + expect_type(unclass(tbl_out$a), "double") + tbl_out + } + ) + }, + + arrow_append_table_arrow_roundtrip_date_extended = function(ctx, con) { + #' also for dates prior to 1970 or 1900 or after 2038 + if (!isTRUE(ctx$tweaks$date_typed)) { + skip("tweak: !date_typed") + } + + tbl_in <- data.frame(a = as_numeric_date(c( + "1811-11-11", + "1899-12-31", + "1900-01-01", + "1950-05-05", + "1969-12-31", + "1970-01-01", + "2037-01-01", + "2038-01-01", + "2040-01-01", + "2999-09-09" + ))) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(tbl_out) { + expect_type(unclass(tbl_out$a), "double") + tbl_out + } + ) + }, + + arrow_append_table_arrow_roundtrip_time = function(ctx, con) { + #' - time + #' (if supported by the database; + if (!isTRUE(ctx$tweaks$time_typed)) { + skip("tweak: !time_typed") + } + + tbl_in <- data.frame(a = hms::hms(minutes = 1:5)) + tbl_in$b <- .difftime(as.numeric(tbl_in$a) / 60, "mins") + + tbl_exp <- tbl_in + tbl_exp$a <- hms::as_hms(tbl_exp$a) + tbl_exp$b <- hms::as_hms(tbl_exp$b) + + test_arrow_roundtrip( + con, tbl_in, tbl_exp, + transform = function(tbl_out) { + #' returned as objects that inherit from `difftime`) + expect_s3_class(tbl_out$a, "difftime") + expect_s3_class(tbl_out$b, "difftime") + tbl_out$a <- hms::as_hms(tbl_out$a) + tbl_out$b <- hms::as_hms(tbl_out$b) + tbl_out + } + ) + }, + + arrow_append_table_arrow_roundtrip_timestamp = function(ctx, con) { + skip("Fails in adbc") + + #' - timestamp + #' (if supported by the database; + if (!isTRUE(ctx$tweaks$timestamp_typed)) { + skip("tweak: !timestamp_typed") + } + + #' returned as `POSIXct` + local <- round(Sys.time()) + + c( + 1, 60, 3600, 86400, + 86400 * 90, 86400 * 180, 86400 * 270, + 1e9, 5e9 + ) + attr(local, "tzone") <- "" + tbl_in <- data.frame(id = seq_along(local)) + tbl_in$local <- local + tbl_in$gmt <- lubridate::with_tz(local, tzone = "GMT") + tbl_in$pst8pdt <- lubridate::with_tz(local, tzone = "PST8PDT") + tbl_in$utc <- lubridate::with_tz(local, tzone = "UTC") + + #' respecting the time zone but not necessarily preserving the + #' input time zone), + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(out) { + dates <- vapply(out, inherits, "POSIXt", FUN.VALUE = logical(1L)) + tz <- toupper(names(out)) + tz[tz == "LOCAL"] <- "" + out[dates] <- Map(lubridate::with_tz, out[dates], tz[dates]) + out + } + ) + }, + + arrow_append_table_arrow_roundtrip_timestamp_extended = function(ctx, con) { + skip("Fails in adbc") + + #' also for timestamps prior to 1970 or 1900 or after 2038 + if (!isTRUE(ctx$tweaks$timestamp_typed)) { + skip("tweak: !timestamp_typed") + } + + local <- as.POSIXct(c( + "1811-11-11", + "1899-12-31", + "1900-01-01", + "1950-05-05", + "1969-12-31", + "1970-01-01", + "2037-01-01", + "2038-01-01", + "2040-01-01", + "2999-09-09" + )) + + attr(local, "tzone") <- "" + tbl_in <- data.frame(id = seq_along(local)) + tbl_in$local <- local + tbl_in$gmt <- lubridate::with_tz(local, tzone = "GMT") + tbl_in$pst8pdt <- lubridate::with_tz(local, tzone = "PST8PDT") + tbl_in$utc <- lubridate::with_tz(local, tzone = "UTC") + + #' respecting the time zone but not necessarily preserving the + #' input time zone) + test_arrow_roundtrip( + use_append = TRUE, + con, tbl_in, + transform = function(out) { + dates <- vapply(out, inherits, "POSIXt", FUN.VALUE = logical(1L)) + tz <- toupper(names(out)) + tz[tz == "LOCAL"] <- "" + out[dates] <- Map(lubridate::with_tz, out[dates], tz[dates]) + out + } + ) + }, + + #' + arrow_append_table_arrow_roundtrip_mixed = function(con) { + #' Mixing column types in the same table is supported. + data <- list("a", 1L, 1.5) + data <- lapply(data, c, NA) + expanded <- expand.grid(a = data, b = data, c = data) + tbl_in_list <- lapply( + seq_len(nrow(expanded)), + function(i) { + data.frame(lapply(expanded[i, ], unlist, recursive = FALSE)) + } + ) + + lapply(tbl_in_list, test_arrow_roundtrip, con = con) + }, + + arrow_append_table_arrow_name = function(ctx, con) { + #' @section Specification: + #' The `name` argument is processed as follows, + #' to support databases that allow non-syntactic names for their objects: + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + test_in <- trivial_df() + + local_remove_test_table(con, table_name) + #' - If an unquoted table name as string: `dbAppendTableArrow()` will do the quoting, + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame()) + test_out <- check_df(dbReadTable(con, dbQuoteIdentifier(con, table_name))) + expect_equal_arrow(test_out, test_in) + #' perhaps by calling `dbQuoteIdentifier(conn, x = name)` + } + }, + + arrow_append_table_arrow_name_quoted = function(ctx, con) { + #' - If the result of a call to [dbQuoteIdentifier()]: no more quoting is done + if (as.package_version(ctx$tweaks$dbitest_version) < "1.7.2") { + skip(paste0("tweak: dbitest_version: ", ctx$tweaks$dbitest_version)) + } + + #' to support databases that allow non-syntactic names for their objects: + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + test_in <- trivial_df() + + local_remove_test_table(con, table_name) + dbCreateTableArrow(con, dbQuoteIdentifier(con, table_name), test_in %>% stream_frame()) + dbAppendTableArrow(con, dbQuoteIdentifier(con, table_name), test_in %>% stream_frame()) + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_arrow(test_out, test_in) + } + }, + + #' + arrow_append_table_arrow_value_df = function(con, table_name) { + #' @section Specification: + #' The `value` argument must be a data frame + test_in <- trivial_df() + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame()) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_arrow(test_out, test_in) + }, + + arrow_append_table_arrow_value_subset = function(ctx, con, table_name) { + #' with a subset of the columns of the existing table. + test_in <- trivial_df(3, letters[1:3]) + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame(.select = c(2))) + + test_out <- check_df(dbReadTable(con, table_name)) + + test_in[c(1, 3)] <- NA_real_ + expect_equal_arrow(test_out, test_in) + }, + + arrow_append_table_arrow_value_shuffle = function(ctx, con, table_name) { + #' The order of the columns does not matter. + test_in <- trivial_df(3, letters[1:3]) + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame(.select = c(2, 3, 1))) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_arrow(test_out, test_in) + }, + + # + arrow_append_table_arrow_value_shuffle_subset = function(ctx, con, table_name) { + test_in <- trivial_df(4, letters[1:4]) + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame(.select = c(4, 1, 3))) + + test_out <- check_df(dbReadTable(con, table_name)) + test_in[2] <- NA_real_ + expect_equal_arrow(test_out, test_in) + }, + + # + NULL +) diff --git a/R/spec-arrow-bind.R b/R/spec-arrow-bind.R new file mode 100644 index 000000000..d88b88624 --- /dev/null +++ b/R/spec-arrow-bind.R @@ -0,0 +1,2 @@ +# FIXME: Adapt tests from spec_meta_bind +spec_arrow_bind <- list() diff --git a/R/spec-arrow-create-table-arrow.R b/R/spec-arrow-create-table-arrow.R new file mode 100644 index 000000000..540ca816e --- /dev/null +++ b/R/spec-arrow-create-table-arrow.R @@ -0,0 +1,213 @@ +#' spec_arrow_create_table_arrow +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +spec_arrow_create_table_arrow <- list( + arrow_create_table_arrow_formals = function() { + skip("Failed in SQLite") + + # + expect_equal(names(formals(dbCreateTableArrow)), c("conn", "name", "value", "...", "temporary")) + }, + + arrow_create_table_arrow_return = function(con, table_name) { + #' @return + #' `dbCreateTableArrow()` returns `TRUE`, invisibly. + expect_invisible_true(dbCreateTableArrow(con, table_name, stream_frame(trivial_df()))) + }, + + #' + arrow_create_table_arrow_overwrite = function(con, table_name) { + #' @section Failure modes: + #' If the table exists, an error is raised; the remote table remains unchanged. + test_in <- trivial_df() + + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + dbAppendTableArrow(con, table_name, test_in %>% stream_frame()) + expect_error(dbCreateTableArrow(con, table_name, stream_frame(b = 1L))) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in) + }, + + #' + arrow_create_table_arrow_closed_connection = function(ctx, closed_con) { + #' An error is raised when calling this method for a closed + expect_error(dbCreateTableArrow(closed_con, "test", stream_frame(a = 1))) + }, + + arrow_create_table_arrow_invalid_connection = function(ctx, invalid_con) { + #' or invalid connection. + expect_error(dbCreateTableArrow(invalid_con, "test", stream_frame(a = 1))) + }, + + arrow_create_table_arrow_error = function(ctx, con, table_name) { + #' An error is also raised + test_in <- stream_frame(a = 1L) + #' if `name` cannot be processed with [dbQuoteIdentifier()] + expect_error(dbCreateTableArrow(con, NA, test_in)) + #' or if this results in a non-scalar. + expect_error(dbCreateTableArrow(con, c(table_name, table_name), test_in)) + + #' Invalid values for the `temporary` argument + #' (non-scalars, + expect_error(dbCreateTableArrow(con, table_name, test_in, temporary = c(TRUE, FALSE))) + #' unsupported data types, + expect_error(dbCreateTableArrow(con, table_name, fields = 1L)) + expect_error(dbCreateTableArrow(con, table_name, test_in, temporary = 1L)) + #' `NA`, + expect_error(dbCreateTableArrow(con, table_name, fields = NA)) + expect_error(dbCreateTableArrow(con, table_name, test_in, temporary = NA)) + #' incompatible values, + expect_error(dbCreateTableArrow(con, table_name, test_in, fields = letters)) + #' duplicate names) + expect_error(dbCreateTableArrow(con, table_name, fields = c(a = "INTEGER", a = "INTEGER"))) + + #' also raise an error. + }, + + #' @section Additional arguments: + #' The following arguments are not part of the `dbCreateTableArrow()` generic + #' (to improve compatibility across backends) + #' but are part of the DBI specification: + #' - `temporary` (default: `FALSE`) + #' + #' They must be provided as named arguments. + #' See the "Specification" and "Value" sections for details on their usage. + + arrow_create_table_arrow_name = function(ctx, con) { + #' @section Specification: + #' The `name` argument is processed as follows, + #' to support databases that allow non-syntactic names for their objects: + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + test_in <- trivial_df() + + local_remove_test_table(con, table_name) + #' - If an unquoted table name as string: `dbCreateTableArrow()` will do the quoting, + dbCreateTableArrow(con, table_name, test_in %>% stream_frame()) + test_out <- check_df(dbReadTable(con, dbQuoteIdentifier(con, table_name))) + expect_equal_df(test_out, test_in[0, , drop = FALSE]) + #' perhaps by calling `dbQuoteIdentifier(conn, x = name)` + } + }, + + arrow_create_table_arrow_name_quoted = function(ctx, con) { + #' - If the result of a call to [dbQuoteIdentifier()]: no more quoting is done + if (as.package_version(ctx$tweaks$dbitest_version) < "1.7.2") { + skip(paste0("tweak: dbitest_version: ", ctx$tweaks$dbitest_version)) + } + + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + test_in <- trivial_df() + + local_remove_test_table(con, table_name) + dbCreateTableArrow(con, dbQuoteIdentifier(con, table_name), test_in %>% stream_frame()) + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in[0, , drop = FALSE]) + } + }, + + #' + create_temporary_table = function(ctx, con, table_name = "dbit03") { + #' If the `temporary` argument is `TRUE`, the table is not available in a + #' second connection and is gone after reconnecting. + #' Not all backends support this argument. + if (!isTRUE(ctx$tweaks$temporary_tables)) { + skip("tweak: temporary_tables") + } + + penguins <- get_penguins(ctx) + dbCreateTableArrow(con, table_name, stream_frame(penguins), temporary = TRUE) + penguins_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(penguins_out, penguins[0, , drop = FALSE]) + + con2 <- local_connection(ctx) + expect_error(dbReadTable(con2, table_name)) + }, + # second stage + create_temporary_table = function(con) { + table_name <- "dbit03" + expect_error(dbReadTable(con, table_name)) + }, + + arrow_create_table_arrow_visible_in_other_connection = function(ctx, local_con) { + skip("Fails in adbc") + + #' A regular, non-temporary table is visible in a second connection, + penguins <- get_penguins(ctx) + + table_name <- "dbit04" + dbCreateTableArrow(local_con, table_name, stream_frame(penguins)) + penguins_out <- check_df(dbReadTable(local_con, table_name)) + expect_equal_df(penguins_out, penguins[0, , drop = FALSE]) + + con2 <- local_connection(ctx) + expect_equal_df(dbReadTable(con2, table_name), penguins[0, , drop = FALSE]) + }, + # second stage + arrow_create_table_arrow_visible_in_other_connection = function(ctx, con) { + skip("Fails in adbc") + + penguins <- get_penguins(ctx) + + table_name <- "dbit04" + + #' in a pre-existing connection, + expect_equal_df(check_df(dbReadTable(con, table_name)), penguins[0, , drop = FALSE]) + }, + # third stage + arrow_create_table_arrow_visible_in_other_connection = function(ctx, local_con, table_name = "dbit04") { + skip("Fails in adbc") + + penguins <- get_penguins(ctx) + + #' and after reconnecting to the database. + expect_equal_df(check_df(dbReadTable(local_con, table_name)), penguins[0, , drop = FALSE]) + }, + + #' + arrow_create_table_arrow_roundtrip_keywords = function(ctx, con) { + #' SQL keywords can be used freely in table names, column names, and data. + tbl_in <- data.frame( + select = "unique", from = "join", where = "order", + stringsAsFactors = FALSE + ) + test_table_roundtrip(con, tbl_in, name = "exists", use_append = TRUE) + }, + + arrow_create_table_arrow_roundtrip_quotes = function(ctx, con) { + #' Quotes, commas, and spaces can also be used for table names and column names, + #' if the database supports non-syntactic identifiers. + if (isTRUE(ctx$tweaks$strict_identifier)) { + skip("tweak: strict_identifier") + } + + table_names <- c( + as.character(dbQuoteIdentifier(con, "")), + as.character(dbQuoteString(con, "")), + "with space", + "," + ) + + for (table_name in table_names) { + tbl_in <- data.frame(trivial_df(4, table_names)) + + test_table_roundtrip(con, tbl_in, use_append = TRUE) + } + }, + # + NULL +) diff --git a/R/spec-arrow-fetch-arrow.R b/R/spec-arrow-fetch-arrow.R new file mode 100644 index 000000000..dbf27ae7e --- /dev/null +++ b/R/spec-arrow-fetch-arrow.R @@ -0,0 +1,101 @@ +#' spec_arrow_fetch_arrow +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +spec_arrow_fetch_arrow <- list( + arrow_fetch_arrow_formals = function() { + # + expect_equal(names(formals(dbFetchArrow)), c("res", "...")) + }, + + arrow_fetch_arrow_atomic = function(con) { + #' @return + #' `dbFetchArrow()` always returns an object coercible to a [data.frame] + #' with as many rows as records were fetched and as many + #' columns as fields in the result set, + #' even if the result is a single value + query <- trivial_query() + res <- local_result(dbSendQueryArrow(con, query)) + rows <- check_arrow(dbFetchArrow(res)) + expect_equal(rows, data.frame(a = 1.5)) + }, + + arrow_fetch_arrow_one_row = function(con) { + #' or has one + query <- trivial_query(3, letters[1:3]) + result <- trivial_df(3, letters[1:3]) + res <- local_result(dbSendQueryArrow(con, query)) + rows <- check_arrow(dbFetchArrow(res)) + expect_identical(rows, result) + }, + + arrow_fetch_arrow_zero_rows = function(con) { + skip("Causes segfault in adbc") + + #' or zero rows. + query <- + "SELECT * FROM (SELECT 1 as a, 2 as b, 3 as c) AS x WHERE (1 = 0)" + res <- local_result(dbSendQueryArrow(con, query)) + rows <- check_arrow(dbFetchArrow(res)) + expect_identical(class(rows), "data.frame") + }, + + #' + arrow_fetch_arrow_closed = function(con) { + skip("Fails in adbc") + + #' @section Failure modes: + #' An attempt to fetch from a closed result set raises an error. + query <- trivial_query() + + res <- dbSendQueryArrow(con, query) + dbClearResult(res) + + expect_error(dbFetchArrow(res)) + }, + + arrow_fetch_arrow_multi_row_single_column = function(ctx, con) { + #' @section Specification: + #' Fetching multi-row queries with one + query <- trivial_query(3, .ctx = ctx, .order_by = "a") + result <- trivial_df(3) + + res <- local_result(dbSendQueryArrow(con, query)) + rows <- check_arrow(dbFetchArrow(res)) + expect_identical(rows, result) + }, + + arrow_fetch_arrow_multi_row_multi_column = function(ctx, con) { + #' or more columns by default returns the entire result. + query <- sql_union( + .ctx = ctx, paste("SELECT", 1:5 + 0.5, "AS a,", 4:0 + 0.5, "AS b"), .order_by = "a" + ) + + res <- local_result(dbSendQueryArrow(con, query)) + rows <- check_arrow(dbFetchArrow(res)) + expect_identical(rows, data.frame(a = 1:5 + 0.5, b = 4:0 + 0.5)) + }, + + arrow_fetch_arrow_record_batch_reader = function(ctx, con) { + #' The object returned by `dbFetchArrow()` can also be passed to + #' [arrow::as_record_batch_reader()] to create an Arrow + #' RecordBatchReader object that can be used to read the result set + #' in batches. + query <- trivial_query(25, .ctx = ctx, .order_by = "a") + result <- trivial_df(25) + + res <- local_result(dbSendQueryArrow(con, query)) + stream <- dbFetchArrow(res) + + rbr <- arrow::as_record_batch_reader(stream) + + #' The chunk size is implementation-specific. + # Arrow returns a tibble when it shouldn't + out <- as.data.frame(as.data.frame(rbr$read_next_batch())) + expect_equal(out, head(result, nrow(out))) + }, + + # + NULL +) diff --git a/R/spec-arrow-get-query-arrow.R b/R/spec-arrow-get-query-arrow.R new file mode 100644 index 000000000..140d076c3 --- /dev/null +++ b/R/spec-arrow-get-query-arrow.R @@ -0,0 +1,146 @@ +#' spec_arrow_get_query_arrow +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +spec_arrow_get_query_arrow <- list( + arrow_get_query_arrow_formals = function() { + # + expect_equal(names(formals(dbGetQueryArrow)), c("conn", "statement", "...")) + }, + + arrow_get_query_arrow_atomic = function(con) { + #' @return + #' `dbGetQueryArrow()` always returns an object coercible to a [data.frame] + #' with as many rows as records were fetched and as many + #' columns as fields in the result set, + #' even if the result is a single value + query <- trivial_query() + + rows <- check_arrow(dbGetQueryArrow(con, query)) + expect_equal(rows, data.frame(a = 1.5)) + }, + + arrow_get_query_arrow_one_row = function(con) { + #' or has one + query <- trivial_query(3, letters[1:3]) + result <- trivial_df(3, letters[1:3]) + + rows <- check_arrow(dbGetQueryArrow(con, query)) + expect_identical(rows, result) + }, + + arrow_get_query_arrow_zero_rows = function(con) { + skip("Causes segfault in adbc") + + #' or zero rows. + # Not all SQL dialects seem to support the query used here. + query <- + "SELECT * FROM (SELECT 1 as a, 2 as b, 3 as c) AS x WHERE (1 = 0)" + + rows <- check_arrow(dbGetQueryArrow(con, query)) + expect_identical(names(rows), letters[1:3]) + expect_identical(dim(rows), c(0L, 3L)) + }, + + #' + arrow_get_query_arrow_closed_connection = function(ctx, closed_con) { + #' @section Failure modes: + #' An error is raised when issuing a query over a closed + expect_error(dbGetQueryArrow(closed_con, trivial_query())) + }, + + arrow_get_query_arrow_invalid_connection = function(ctx, invalid_con) { + #' or invalid connection, + expect_error(dbGetQueryArrow(invalid_con, trivial_query())) + }, + + arrow_get_query_arrow_syntax_error = function(con) { + #' if the syntax of the query is invalid, + expect_error(dbGetQueryArrow(con, "SELLECT")) + }, + + arrow_get_query_arrow_non_string = function(con) { + #' or if the query is not a non-`NA` string. + expect_error(dbGetQueryArrow(con, character())) + expect_error(dbGetQueryArrow(con, letters)) + expect_error(dbGetQueryArrow(con, NA_character_)) + }, + + arrow_get_query_arrow_record_batch_reader = function(ctx, con) { + #' The object returned by `dbGetQueryArrow()` can also be passed to + #' [arrow::as_record_batch_reader()] to create an Arrow + #' RecordBatchReader object that can be used to read the result set + #' in batches. + query <- trivial_query(25, .ctx = ctx, .order_by = "a") + result <- trivial_df(25) + + stream <- dbGetQueryArrow(con, query) + + rbr <- arrow::as_record_batch_reader(stream) + + #' The chunk size is implementation-specific. + # Arrow returns a tibble when it shouldn't + out <- as.data.frame(as.data.frame(rbr$read_next_batch())) + expect_equal(out, head(result, nrow(out))) + }, + + # #' @section Additional arguments: + # #' The following arguments are not part of the `dbGetQueryArrow()` generic + # #' (to improve compatibility across backends) + # #' but are part of the DBI specification: + # #' - `params` (default: `NULL`) + # #' - `immediate` (default: `NULL`) + # #' + # #' They must be provided as named arguments. + # #' See the "Specification" and "Value" sections for details on their usage. + # + # #' + # arrow_get_query_arrow_params = function(ctx, con) { + # #' The `param` argument allows passing query parameters, see [dbBind()] for details. + # placeholder_funs <- get_placeholder_funs(ctx) + # + # for (placeholder_fun in placeholder_funs) { + # placeholder <- placeholder_fun(1) + # query <- paste0("SELECT ", placeholder, " + 1.0 AS a") + # values <- trivial_values(3) - 1 + # params <- stats::setNames(list(values), names(placeholder)) + # ret <- dbGetQueryArrow(con, query, params = params) + # expect_equal(ret, trivial_df(3), info = placeholder) + # } + # }, + # + # arrow_get_query_arrow_immediate = function(con, table_name) { + # #' @section Specification for the `immediate` argument: + # #' + # #' The `immediate` argument supports distinguishing between "direct" + # #' and "prepared" APIs offered by many database drivers. + # #' Passing `immediate = TRUE` leads to immediate execution of the + # #' query or statement, via the "direct" API (if supported by the driver). + # #' The default `NULL` means that the backend should choose whatever API + # #' makes the most sense for the database, and (if relevant) tries the + # #' other API if the first attempt fails. A successful second attempt + # #' should result in a message that suggests passing the correct + # #' `immediate` argument. + # #' Examples for possible behaviors: + # #' 1. DBI backend defaults to `immediate = TRUE` internally + # #' 1. A query without parameters is passed: query is executed + # #' 1. A query with parameters is passed: + # #' 1. `params` not given: rejected immediately by the database + # #' because of a syntax error in the query, the backend tries + # #' `immediate = FALSE` (and gives a message) + # #' 1. `params` given: query is executed using `immediate = FALSE` + # #' 1. DBI backend defaults to `immediate = FALSE` internally + # #' 1. A query without parameters is passed: + # #' 1. simple query: query is executed + # #' 1. "special" query (such as setting a config options): fails, + # #' the backend tries `immediate = TRUE` (and gives a message) + # #' 1. A query with parameters is passed: + # #' 1. `params` not given: waiting for parameters via [dbBind()] + # #' 1. `params` given: query is executed + # res <- expect_visible(dbGetQueryArrow(con, trivial_query(), immediate = TRUE)) + # expect_s3_class(res, "data.frame") + # }, + # + NULL +) diff --git a/R/spec-arrow-read-table-arrow.R b/R/spec-arrow-read-table-arrow.R new file mode 100644 index 000000000..6c83a9cbb --- /dev/null +++ b/R/spec-arrow-read-table-arrow.R @@ -0,0 +1,99 @@ +#' spec_arrow_read_table_arrow +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +spec_arrow_read_table_arrow <- list( + arrow_read_table_arrow_formals = function() { + # + expect_equal(names(formals(dbReadTableArrow)), c("conn", "name", "...")) + }, + + arrow_read_table_arrow = function(ctx, con, table_name) { + # Failed on duckdb + skip_if_not_dbitest(ctx, "1.7.4") + + #' @return + #' `dbReadTableArrow()` returns a data frame that contains the complete data + #' from the remote table, effectively the result of calling [dbGetQuery()] + #' with `SELECT * FROM `. + penguins_in <- get_penguins(ctx) + dbWriteTable(con, table_name, penguins_in) + penguins_out <- check_arrow(dbReadTableArrow(con, table_name)) + + expect_equal_df(penguins_out, penguins_in) + }, + + #' + arrow_read_table_arrow_missing = function(con, table_name) { + #' @section Failure modes: + #' An error is raised if the table does not exist. + expect_error(dbReadTableArrow(con, table_name)) + }, + + arrow_read_table_arrow_empty = function(ctx, con, table_name) { + skip("Causes segfault in adbc and duckdb") + + #' @return + #' An empty table is returned as a data frame with zero rows. + penguins_in <- get_penguins(ctx)[integer(), ] + dbWriteTable(con, table_name, penguins_in) + penguins_out <- check_arrow(dbReadTableArrow(con, table_name)) + + expect_equal(nrow(penguins_out), 0L) + expect_equal_df(penguins_out, penguins_in) + }, + + #' + arrow_read_table_arrow_closed_connection = function(ctx, con, table_name) { + #' @section Failure modes: + #' An error is raised when calling this method for a closed + dbWriteTable(con, table_name, data.frame(a = 1)) + con2 <- local_closed_connection(ctx = ctx) + expect_error(dbReadTableArrow(con2, table_name)) + }, + + arrow_read_table_arrow_invalid_connection = function(ctx, con, table_name) { + #' or invalid connection. + dbWriteTable(con, table_name, data.frame(a = 1)) + con2 <- local_invalid_connection(ctx) + expect_error(dbReadTableArrow(con2, table_name)) + }, + + arrow_read_table_arrow_error = function(ctx, con, table_name) { + #' An error is raised + dbWriteTable(con, table_name, data.frame(a = 1L)) + #' if `name` cannot be processed with [dbQuoteIdentifier()] + expect_error(dbReadTableArrow(con, NA)) + #' or if this results in a non-scalar. + expect_error(dbReadTableArrow(con, c(table_name, table_name))) + }, + + arrow_read_table_arrow_name = function(ctx, con) { + #' @section Specification: + #' The `name` argument is processed as follows, + #' to support databases that allow non-syntactic names for their objects: + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + local_remove_test_table(con, table_name) + test_in <- data.frame(a = 1L) + dbWriteTable(con, table_name, test_in) + + #' - If an unquoted table name as string: `dbReadTableArrow()` will do the + #' quoting, + test_out <- check_arrow(dbReadTableArrow(con, table_name)) + expect_equal_df(test_out, test_in) + #' perhaps by calling `dbQuoteIdentifier(conn, x = name)` + #' - If the result of a call to [dbQuoteIdentifier()]: no more quoting is done + test_out <- check_arrow(dbReadTableArrow(con, dbQuoteIdentifier(con, table_name))) + expect_equal_df(test_out, test_in) + } + }, + # + NULL +) diff --git a/R/spec-arrow-roundtrip.R b/R/spec-arrow-roundtrip.R new file mode 100644 index 000000000..90667154c --- /dev/null +++ b/R/spec-arrow-roundtrip.R @@ -0,0 +1,2 @@ +# FIXME: Adapt tests from spec_result_roundtrip +spec_arrow_roundtrip <- list() diff --git a/R/spec-arrow-send-query-arrow.R b/R/spec-arrow-send-query-arrow.R new file mode 100644 index 000000000..dd9587cdd --- /dev/null +++ b/R/spec-arrow-send-query-arrow.R @@ -0,0 +1,128 @@ +#' spec_result_send_query +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +spec_arrow_send_query_arrow <- list( + arrow_send_query_formals = function() { + # + expect_equal(names(formals(dbSendQueryArrow)), c("conn", "statement", "...")) + }, + + arrow_send_query_trivial = function(con) { + #' @return + #' `dbSendQueryArrow()` returns + res <- expect_visible(dbSendQueryArrow(con, trivial_query())) + #' an S4 object that inherits from [DBIResultArrow-class]. + expect_s4_class(res, "DBIResultArrow") + #' The result set can be used with [dbFetchArrow()] to extract records. + expect_equal(check_arrow(dbFetchArrow(res))[[1]], 1.5) + #' Once you have finished using a result, make sure to clear it + #' with [dbClearResult()]. + dbClearResult(res) + }, + + #' + arrow_send_query_closed_connection = function(ctx, closed_con) { + #' @section Failure modes: + #' An error is raised when issuing a query over a closed + expect_error(dbSendQueryArrow(closed_con, trivial_query())) + }, + + arrow_send_query_invalid_connection = function(ctx, invalid_con) { + #' or invalid connection, + expect_error(dbSendQueryArrow(invalid_con, trivial_query())) + }, + + arrow_send_query_non_string = function(con) { + #' or if the query is not a non-`NA` string. + expect_error(dbSendQueryArrow(con, character())) + expect_error(dbSendQueryArrow(con, letters)) + expect_error(dbSendQueryArrow(con, NA_character_)) + }, + + # FIXME: Enable + + # arrow_send_query_syntax_error = function(con) { + # #' An error is also raised if the syntax of the query is invalid + # #' and all query parameters are given (by passing the `params` argument) + # #' or the `immediate` argument is set to `TRUE`. + # #' + # #' @section Failure modes: + # expect_error(dbSendQueryArrow(con, "SELLECT", params = list())) + # expect_error(dbSendQueryArrow(con, "SELLECT", immediate = TRUE)) + # }, + # #' @section Additional arguments: + # #' The following arguments are not part of the `dbSendQueryArrow()` generic + # #' (to improve compatibility across backends) + # #' but are part of the DBI specification: + # #' - `params` (default: `NULL`) + # #' - `immediate` (default: `NULL`) + # #' + # #' They must be provided as named arguments. + # #' See the "Specification" sections for details on their usage. + # + # arrow_send_query_result_valid = function(con) { + # #' @section Specification: + # #' No warnings occur under normal conditions. + # expect_warning(res <- dbSendQueryArrow(con, trivial_query()), NA) + # #' When done, the DBIResult object must be cleared with a call to + # #' [dbClearResult()]. + # dbClearResult(res) + # }, + # # + # arrow_send_query_stale_warning = function(ctx) { + # #' Failure to clear the result set leads to a warning + # #' when the connection is closed. + # con <- connect(ctx) + # on.exit(dbDisconnect(con)) + # expect_warning(dbSendQueryArrow(con, trivial_query()), NA) + # + # expect_warning({ + # dbDisconnect(con) + # gc() + # }) + # on.exit(NULL) + # }, + # + # #' + # arrow_send_query_only_one_result_set = function(con) { + # #' If the backend supports only one open result set per connection, + # res1 <- dbSendQueryArrow(con, trivial_query()) + # #' issuing a second query invalidates an already open result set + # #' and raises a warning. + # expect_warning(res2 <- dbSendQueryArrow(con, "SELECT 2")) + # expect_false(dbIsValid(res1)) + # #' The newly opened result set is valid + # expect_true(dbIsValid(res2)) + # #' and must be cleared with `dbClearResult()`. + # dbClearResult(res2) + # }, + # + # #' + # arrow_send_query_params = function(ctx, con) { + # #' The `param` argument allows passing query parameters, see [dbBind()] for details. + # placeholder_funs <- get_placeholder_funs(ctx) + # + # for (placeholder_fun in placeholder_funs) { + # placeholder <- placeholder_fun(1) + # query <- paste0("SELECT ", placeholder, " + 1.0 AS a") + # values <- trivial_values(3) - 1 + # params <- stats::setNames(list(values), names(placeholder)) + # rs <- dbSendQueryArrow(con, query, params = params) + # ret <- dbFetch(rs) + # expect_equal(ret, trivial_df(3), info = placeholder) + # dbClearResult(rs) + # } + # }, + # + # arrow_send_query_immediate = function(con, table_name) { + # #' @inheritSection spec_result_get_query Specification for the `immediate` argument + # res <- expect_visible(dbSendQueryArrow(con, trivial_query(), immediate = TRUE)) + # expect_s4_class(res, "DBIResult") + # expect_error(dbGetRowsAffected(res), NA) + # dbClearResult(res) + # }, + # + NULL +) diff --git a/R/spec-arrow-write-table-arrow.R b/R/spec-arrow-write-table-arrow.R new file mode 100644 index 000000000..6fa2645d3 --- /dev/null +++ b/R/spec-arrow-write-table-arrow.R @@ -0,0 +1,780 @@ +#' spec_arrow_write_table_arrow +#' @family Arrow specifications +#' @usage NULL +#' @format NULL +#' @keywords NULL +#' @importFrom lubridate with_tz +spec_arrow_write_table_arrow <- list( + arrow_write_table_arrow_formals = function() { + # + expect_equal(names(formals(dbWriteTableArrow)), c("conn", "name", "value", "...")) + }, + + arrow_write_table_arrow_return = function(con, table_name) { + #' @return + #' `dbWriteTableArrow()` returns `TRUE`, invisibly. + expect_invisible_true(dbWriteTableArrow(con, table_name, stream_frame(a = 1L))) + }, + + #' + arrow_write_table_arrow_error_overwrite = function(con, table_name) { + skip("Failed in SQLite") + + #' @section Failure modes: + #' If the table exists, and both `append` and `overwrite` arguments are unset, + test_in <- data.frame(a = 1L) + dbWriteTableArrow(con, table_name, test_in %>% stream_frame()) + expect_error(dbWriteTableArrow(con, table_name, stream_frame(a = 2L))) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in) + }, + + arrow_write_table_arrow_append_incompatible = function(con, table_name) { + #' or `append = TRUE` and the data frame with the new data has different + #' column names, + #' an error is raised; the remote table remains unchanged. + test_in <- data.frame(a = 1L) + dbWriteTableArrow(con, table_name, test_in %>% stream_frame()) + expect_error(dbWriteTableArrow(con, table_name, stream_frame(b = 2L), append = TRUE)) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in) + }, + + #' + arrow_write_table_arrow_closed_connection = function(ctx, closed_con) { + #' An error is raised when calling this method for a closed + expect_error(dbWriteTableArrow(closed_con, "test", stream_frame(a = 1))) + }, + + arrow_write_table_arrow_invalid_connection = function(ctx, invalid_con) { + #' or invalid connection. + expect_error(dbWriteTableArrow(invalid_con, "test", stream_frame(a = 1))) + }, + + arrow_write_table_arrow_error = function(ctx, con, table_name) { + skip("Failed in SQLite") + + #' An error is also raised + test_in <- stream_frame(a = 1L) + #' if `name` cannot be processed with [dbQuoteIdentifier()] + expect_error(dbWriteTableArrow(con, NA, test_in %>% stream_frame())) + #' or if this results in a non-scalar. + expect_error(dbWriteTableArrow(con, c(table_name, table_name), test_in %>% stream_frame())) + + #' Invalid values for the additional arguments + #' `overwrite`, `append`, and `temporary` + #' (non-scalars, + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), overwrite = c(TRUE, FALSE))) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), append = c(TRUE, FALSE))) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), temporary = c(TRUE, FALSE))) + #' unsupported data types, + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), overwrite = 1L)) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), append = 1L)) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), temporary = 1L)) + #' `NA`, + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), overwrite = NA)) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), append = NA)) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), temporary = NA)) + #' incompatible values, + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), overwrite = TRUE, append = TRUE)) + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame(), append = TRUE)) + #' duplicate + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame())) + #' or missing names, + expect_error(dbWriteTableArrow(con, table_name, test_in %>% stream_frame())) + + #' incompatible columns) + dbWriteTableArrow(con, table_name, test_in %>% stream_frame()) + expect_error(dbWriteTableArrow(con, table_name, stream_frame(b = 2L, c = 3L), append = TRUE)) + + #' also raise an error. + }, + + #' @section Additional arguments: + #' The following arguments are not part of the `dbWriteTableArrow()` generic + #' (to improve compatibility across backends) + #' but are part of the DBI specification: + #' - `overwrite` (default: `FALSE`) + #' - `append` (default: `FALSE`) + #' - `temporary` (default: `FALSE`) + #' + #' They must be provided as named arguments. + #' See the "Specification" and "Value" sections for details on their usage. + + arrow_write_table_arrow_name = function(ctx, con) { + #' @section Specification: + #' The `name` argument is processed as follows, + #' to support databases that allow non-syntactic names for their objects: + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + test_in <- data.frame(a = 1) + local_remove_test_table(con, table_name) + #' - If an unquoted table name as string: `dbWriteTableArrow()` will do the quoting, + dbWriteTableArrow(con, table_name, test_in %>% stream_frame()) + test_out <- check_df(dbReadTable(con, dbQuoteIdentifier(con, table_name))) + expect_equal_df(test_out, test_in) + #' perhaps by calling `dbQuoteIdentifier(conn, x = name)` + } + }, + + arrow_write_table_arrow_name_quoted = function(ctx, con) { + #' - If the result of a call to [dbQuoteIdentifier()]: no more quoting is done + if (as.package_version(ctx$tweaks$dbitest_version) < "1.7.2") { + skip(paste0("tweak: dbitest_version: ", ctx$tweaks$dbitest_version)) + } + + if (isTRUE(ctx$tweaks$strict_identifier)) { + table_names <- "a" + } else { + table_names <- c("a", "with spaces", "with,comma") + } + + for (table_name in table_names) { + test_in <- data.frame(a = 1) + + local_remove_test_table(con, table_name) + dbWriteTableArrow(con, dbQuoteIdentifier(con, table_name), test_in %>% stream_frame()) + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in) + } + }, + + #' + arrow_write_table_arrow_value_df = function(con, table_name) { + #' The `value` argument must be a data frame + test_in <- trivial_df() + dbWriteTableArrow(con, table_name, test_in %>% stream_frame()) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in) + }, + + arrow_write_table_arrow_value_subset = function(ctx, con, table_name) { + #' with a subset of the columns of the existing table if `append = TRUE`. + test_in <- trivial_df(3, letters[1:3]) + dbCreateTable(con, table_name, test_in) + dbWriteTableArrow(con, table_name, test_in[2] %>% stream_frame(), append = TRUE) + + test_out <- check_df(dbReadTable(con, table_name)) + + test_in[c(1, 3)] <- NA_real_ + expect_equal_df(test_out, test_in) + }, + + arrow_write_table_arrow_value_shuffle = function(ctx, con, table_name) { + #' The order of the columns does not matter with `append = TRUE`. + test_in <- trivial_df(3, letters[1:3]) + dbCreateTable(con, table_name, test_in) + dbWriteTableArrow(con, table_name, test_in[c(2, 3, 1)] %>% stream_frame(), append = TRUE) + + test_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(test_out, test_in) + }, + + # + arrow_write_table_arrow_value_shuffle_subset = function(ctx, con, table_name) { + test_in <- trivial_df(4, letters[1:4]) + dbCreateTable(con, table_name, test_in) + dbWriteTableArrow(con, table_name, test_in[c(4, 1, 3)] %>% stream_frame(), append = TRUE) + + test_out <- check_df(dbReadTable(con, table_name)) + + test_in[2] <- NA_real_ + expect_equal_df(test_out, test_in) + }, + + #' + arrow_write_table_arrow_overwrite = function(ctx, con, table_name) { + skip("Requires dbBind() on RMariaDB") + + #' If the `overwrite` argument is `TRUE`, an existing table of the same name + #' will be overwritten. + penguins <- get_penguins(ctx) + dbWriteTableArrow(con, table_name, penguins) + expect_error( + dbWriteTableArrow(con, table_name, penguins[1, ] %>% stream_frame(), overwrite = TRUE), + NA + ) + penguins_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(penguins_out, penguins[1, ]) + }, + + arrow_write_table_arrow_overwrite_missing = function(ctx, con, table_name) { + skip("Requires dbBind() on RMariaDB") + + #' This argument doesn't change behavior if the table does not exist yet. + penguins_in <- get_penguins(ctx) + expect_error( + dbWriteTableArrow(con, table_name, penguins_in[1, ] %>% stream_frame(), overwrite = TRUE), + NA + ) + penguins_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(penguins_out, penguins_in[1, ]) + }, + + #' + arrow_write_table_arrow_append = function(ctx, con, table_name) { + skip("Requires dbBind() on RMariaDB") + + #' If the `append` argument is `TRUE`, the rows in an existing table are + #' preserved, and the new data are appended. + penguins <- get_penguins(ctx) + dbWriteTableArrow(con, table_name, penguins) + expect_error(dbWriteTableArrow(con, table_name, penguins[1, ] %>% stream_frame(), append = TRUE), NA) + penguins_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(penguins_out, rbind(penguins, penguins[1, ])) + }, + + arrow_write_table_arrow_append_new = function(ctx, con, table_name) { + skip("Failed in SQLite") + + #' If the table doesn't exist yet, it is created. + penguins <- get_penguins(ctx) + expect_error(dbWriteTableArrow(con, table_name, penguins[1, ] %>% stream_frame(), append = TRUE), NA) + penguins_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(penguins_out, penguins[1, ]) + }, + + #' + arrow_write_table_arrow_temporary = function(ctx, con, table_name = "dbit08") { + skip("Failed in SQLite") + + #' If the `temporary` argument is `TRUE`, the table is not available in a + #' second connection and is gone after reconnecting. + #' Not all backends support this argument. + if (!isTRUE(ctx$tweaks$temporary_tables)) { + skip("tweak: temporary_tables") + } + + penguins <- get_penguins(ctx) + dbWriteTableArrow(con, table_name, penguins %>% stream_frame(), temporary = TRUE) + penguins_out <- check_df(dbReadTable(con, table_name)) + expect_equal_df(penguins_out, penguins) + + con2 <- local_connection(ctx) + expect_error(dbReadTable(con2, table_name)) + }, + # second stage + arrow_write_table_arrow_temporary = function(ctx, con) { + skip("Failed in SQLite") + + if (!isTRUE(ctx$tweaks$temporary_tables)) { + skip("tweak: temporary_tables") + } + + table_name <- "dbit08" + expect_error(dbReadTable(con, table_name)) + }, + + arrow_write_table_arrow_visible_in_other_connection = function(ctx, local_con) { + skip("Failed in SQLite") + + #' A regular, non-temporary table is visible in a second connection, + penguins30 <- get_penguins(ctx) + + table_name <- "dbit09" + + dbWriteTableArrow(local_con, table_name, penguins30 %>% stream_frame()) + penguins_out <- check_df(dbReadTable(local_con, table_name)) + expect_equal_df(penguins_out, penguins30) + + con2 <- local_connection(ctx) + expect_equal_df(dbReadTable(con2, table_name), penguins30) + }, + # second stage + arrow_write_table_arrow_visible_in_other_connection = function(ctx, con) { + skip("Failed in SQLite") + + #' in a pre-existing connection, + penguins30 <- get_penguins(ctx) + + table_name <- "dbit09" + + expect_equal_df(check_df(dbReadTable(con, table_name)), penguins30) + }, + # third stage + arrow_write_table_arrow_visible_in_other_connection = function(ctx, local_con, table_name = "dbit09") { + skip("Failed in SQLite") + + #' and after reconnecting to the database. + penguins30 <- get_penguins(ctx) + + expect_equal_df(check_df(dbReadTable(local_con, table_name)), penguins30) + }, + + #' + arrow_write_table_arrow_roundtrip_keywords = function(ctx, con) { + skip("Requires dbBind() on RMariaDB") + + #' SQL keywords can be used freely in table names, column names, and data. + tbl_in <- data.frame( + select = "unique", from = "join", where = "order", + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(con, tbl_in, name = "exists") + }, + + arrow_write_table_arrow_roundtrip_quotes = function(ctx, con, table_name) { + skip("Requires dbBind() on RMariaDB") + + #' Quotes, commas, spaces, and other special characters such as newlines and tabs, + #' can also be used in the data, + tbl_in <- data.frame( + as.character(dbQuoteString(con, "")), + as.character(dbQuoteIdentifier(con, "")), + "with space", + "a,b", "a\nb", "a\tb", "a\rb", "a\bb", + "a\\Nb", "a\\tb", "a\\rb", "a\\bb", "a\\Zb", + stringsAsFactors = FALSE + ) + + names(tbl_in) <- letters[seq_along(tbl_in)] + test_arrow_roundtrip(con, tbl_in) + }, + + arrow_write_table_arrow_roundtrip_quotes_table_names = function(ctx, con) { + #' and, if the database supports non-syntactic identifiers, + #' also for table names + if (isTRUE(ctx$tweaks$strict_identifier)) { + skip("tweak: strict_identifier") + } + + table_names <- c( + as.character(dbQuoteIdentifier(con, "")), + as.character(dbQuoteString(con, "")), + "with space", + "a,b", "a\nb", "a\tb", "a\rb", "a\bb", + "a\\Nb", "a\\tb", "a\\rb", "a\\bb", "a\\Zb" + ) + + tbl_in <- trivial_df() + + for (table_name in table_names) { + test_arrow_roundtrip_one(con, tbl_in, .add_na = "none") + } + }, + + arrow_write_table_arrow_roundtrip_quotes_column_names = function(ctx, con) { + skip("Failed in SQLite") + + #' and column names. + if (as.package_version(ctx$tweaks$dbitest_version) < "1.7.2") { + skip(paste0("tweak: dbitest_version: ", ctx$tweaks$dbitest_version)) + } + + if (isTRUE(ctx$tweaks$strict_identifier)) { + skip("tweak: strict_identifier") + } + + column_names <- c( + as.character(dbQuoteIdentifier(con, "")), + as.character(dbQuoteString(con, "")), + "with space", + "a,b", "a\nb", "a\tb", "a\rb", "a\bb", + "a\\nb", "a\\tb", "a\\rb", "a\\bb", "a\\zb" + ) + + tbl_in <- trivial_df(length(column_names), column_names) + + test_arrow_roundtrip_one(con, tbl_in, .add_na = "none") + }, + + #' + arrow_write_table_arrow_roundtrip_integer = function(ctx, con) { + #' The following data types must be supported at least, + #' and be read identically with [dbReadTable()]: + #' - integer + tbl_in <- data.frame(a = c(1:5)) + test_arrow_roundtrip(con, tbl_in) + }, + + arrow_write_table_arrow_roundtrip_numeric = function(ctx, con) { + #' - numeric + tbl_in <- data.frame(a = c(seq(1, 3, by = 0.5))) + test_arrow_roundtrip(con, tbl_in) + #' (the behavior for `Inf` and `NaN` is not specified) + }, + + arrow_write_table_arrow_roundtrip_logical = function(ctx, con) { + skip("Fails in adbc") + + #' - logical + tbl_in <- data.frame(a = c(TRUE, FALSE, NA)) + tbl_exp <- tbl_in + tbl_exp$a <- ctx$tweaks$logical_return(tbl_exp$a) + test_arrow_roundtrip(con, tbl_in, tbl_exp) + }, + + arrow_write_table_arrow_roundtrip_null = function(ctx, con) { + #' - `NA` as NULL + tbl_in <- data.frame(a = NA) + test_arrow_roundtrip( + con, tbl_in, + transform = function(tbl_out) { + tbl_out$a <- as.logical(tbl_out$a) # Plain NA is of type logical + tbl_out + } + ) + }, + + #' - 64-bit values (using `"bigint"` as field type); the result can be + arrow_write_table_arrow_roundtrip_64_bit_numeric = function(ctx, con) { + tbl_in <- data.frame(a = c(-1e14, 1e15)) + test_arrow_roundtrip( + con, tbl_in, + transform = function(tbl_out) { + #' - converted to a numeric, which may lose precision, + tbl_out$a <- as.numeric(tbl_out$a) + tbl_out + } + ) + }, + # + arrow_write_table_arrow_roundtrip_64_bit_character = function(ctx, con) { + skip("Failed in SQLite") + + tbl_in <- data.frame(a = c(-1e14, 1e15)) + tbl_exp <- tbl_in + tbl_exp$a <- format(tbl_exp$a, scientific = FALSE) + test_arrow_roundtrip( + con, tbl_in, tbl_exp, + transform = function(tbl_out) { + #' - converted a character vector, which gives the full decimal + #' representation + tbl_out$a <- as.character(tbl_out$a) + tbl_out + } + ) + }, + # + arrow_write_table_arrow_roundtrip_64_bit_roundtrip = function(con, table_name) { + skip("Failed in SQLite") + + tbl_in <- data.frame(a = c(-1e14, 1e15)) + dbWriteTableArrow(con, table_name, tbl_in, field.types = c(a = "BIGINT")) + tbl_out <- dbReadTable(con, table_name) + #' - written to another table and read again unchanged + test_arrow_roundtrip(con, tbl_out, tbl_expected = tbl_out) + }, + + arrow_write_table_arrow_roundtrip_character = function(ctx, con) { + skip("Requires dbBind() on RMariaDB") + + #' - character (in both UTF-8 + tbl_in <- data.frame( + id = seq_along(get_texts()), + a = get_texts(), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(con, tbl_in) + }, + + arrow_write_table_arrow_roundtrip_character_native = function(ctx, con) { + skip("Requires dbBind() on RMariaDB") + + #' and native encodings), + tbl_in <- data.frame( + a = c(enc2native(get_texts())), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(con, tbl_in) + }, + + arrow_write_table_arrow_roundtrip_character_empty = function(ctx, con) { + #' supporting empty strings + tbl_in <- data.frame( + a = c("", "a"), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(con, tbl_in) + }, + + arrow_write_table_arrow_roundtrip_character_empty_after = function(ctx, con) { + #' before and after a non-empty string + tbl_in <- data.frame( + a = c("a", ""), + stringsAsFactors = FALSE + ) + test_arrow_roundtrip(con, tbl_in) + }, + + arrow_write_table_arrow_roundtrip_factor = function(ctx, con) { + skip("Failed in SQLite") + + #' - factor (returned as character) + tbl_in <- data.frame( + a = factor(get_texts()) + ) + tbl_exp <- tbl_in + tbl_exp$a <- as.character(tbl_exp$a) + test_arrow_roundtrip(con, tbl_in, tbl_exp) + }, + + arrow_write_table_arrow_roundtrip_raw = function(ctx, con) { + skip("Failed in SQLite") + + #' - list of raw + #' (if supported by the database) + if (isTRUE(ctx$tweaks$omit_blob_tests)) { + skip("tweak: omit_blob_tests") + } + + tbl_in <- data.frame(id = 1L, a = I(list(as.raw(0:10)))) + tbl_exp <- tbl_in + tbl_exp$a <- blob::as_blob(unclass(tbl_in$a)) + test_arrow_roundtrip( + con, tbl_in, tbl_exp, + transform = function(tbl_out) { + tbl_out$a <- blob::as_blob(tbl_out$a) + tbl_out + } + ) + }, + + arrow_write_table_arrow_roundtrip_blob = function(ctx, con) { + skip("Failed in SQLite") + + #' - objects of type [blob::blob] + #' (if supported by the database) + if (isTRUE(ctx$tweaks$omit_blob_tests)) { + skip("tweak: omit_blob_tests") + } + + tbl_in <- data.frame(id = 1L, a = blob::blob(as.raw(0:10))) + test_arrow_roundtrip( + con, tbl_in, + transform = function(tbl_out) { + tbl_out$a <- blob::as_blob(tbl_out$a) + tbl_out + } + ) + }, + + arrow_write_table_arrow_roundtrip_date = function(ctx, con) { + #' - date + #' (if supported by the database; + if (!isTRUE(ctx$tweaks$date_typed)) { + skip("tweak: !date_typed") + } + + #' returned as `Date`), + tbl_in <- data.frame(a = as_numeric_date(c(Sys.Date() + 1:5))) + test_arrow_roundtrip( + con, tbl_in, + transform = function(tbl_out) { + expect_type(unclass(tbl_out$a), "double") + tbl_out + } + ) + }, + + arrow_write_table_arrow_roundtrip_date_extended = function(ctx, con) { + #' also for dates prior to 1970 or 1900 or after 2038 + if (!isTRUE(ctx$tweaks$date_typed)) { + skip("tweak: !date_typed") + } + + tbl_in <- data.frame(a = as_numeric_date(c( + "1811-11-11", + "1899-12-31", + "1900-01-01", + "1950-05-05", + "1969-12-31", + "1970-01-01", + "2037-01-01", + "2038-01-01", + "2040-01-01", + "2999-09-09" + ))) + test_arrow_roundtrip( + con, tbl_in, + transform = function(tbl_out) { + expect_type(unclass(tbl_out$a), "double") + tbl_out + } + ) + }, + + arrow_write_table_arrow_roundtrip_time = function(ctx, con) { + #' - time + #' (if supported by the database; + if (!isTRUE(ctx$tweaks$time_typed)) { + skip("tweak: !time_typed") + } + + tbl_in <- data.frame(a = hms::hms(minutes = 1:5)) + tbl_in$b <- .difftime(as.numeric(tbl_in$a) / 60, "mins") + + tbl_exp <- tbl_in + tbl_exp$a <- hms::as_hms(tbl_exp$a) + tbl_exp$b <- hms::as_hms(tbl_exp$b) + + test_arrow_roundtrip( + con, tbl_in, tbl_exp, + transform = function(tbl_out) { + #' returned as objects that inherit from `difftime`) + expect_s3_class(tbl_out$a, "difftime") + expect_s3_class(tbl_out$b, "difftime") + tbl_out$a <- hms::as_hms(tbl_out$a) + tbl_out$b <- hms::as_hms(tbl_out$b) + tbl_out + } + ) + }, + + arrow_write_table_arrow_roundtrip_timestamp = function(ctx, con) { + skip("Fails in adbc") + + #' - timestamp + #' (if supported by the database; + if (!isTRUE(ctx$tweaks$timestamp_typed)) { + skip("tweak: !timestamp_typed") + } + + #' returned as `POSIXct` + local <- round(Sys.time()) + + c( + 1, 60, 3600, 86400, + 86400 * 90, 86400 * 180, 86400 * 270, + 1e9, 5e9 + ) + attr(local, "tzone") <- "" + tbl_in <- data.frame(id = seq_along(local)) + tbl_in$local <- local + tbl_in$gmt <- lubridate::with_tz(local, tzone = "GMT") + tbl_in$pst8pdt <- lubridate::with_tz(local, tzone = "PST8PDT") + tbl_in$utc <- lubridate::with_tz(local, tzone = "UTC") + + #' respecting the time zone but not necessarily preserving the + #' input time zone), + test_arrow_roundtrip( + con, tbl_in, + transform = function(out) { + dates <- vapply(out, inherits, "POSIXt", FUN.VALUE = logical(1L)) + tz <- toupper(names(out)) + tz[tz == "LOCAL"] <- "" + out[dates] <- Map(lubridate::with_tz, out[dates], tz[dates]) + out + } + ) + }, + + arrow_write_table_arrow_roundtrip_timestamp_extended = function(ctx, con) { + skip("Fails in adbc") + + #' also for timestamps prior to 1970 or 1900 or after 2038 + if (!isTRUE(ctx$tweaks$timestamp_typed)) { + skip("tweak: !timestamp_typed") + } + + local <- as.POSIXct(c( + "1811-11-11", + "1899-12-31", + "1900-01-01", + "1950-05-05", + "1969-12-31", + "1970-01-01", + "2037-01-01", + "2038-01-01", + "2040-01-01", + "2999-09-09" + )) + + attr(local, "tzone") <- "" + tbl_in <- data.frame(id = seq_along(local)) + tbl_in$local <- local + tbl_in$gmt <- lubridate::with_tz(local, tzone = "GMT") + tbl_in$pst8pdt <- lubridate::with_tz(local, tzone = "PST8PDT") + tbl_in$utc <- lubridate::with_tz(local, tzone = "UTC") + + #' respecting the time zone but not necessarily preserving the + #' input time zone) + test_arrow_roundtrip( + con, tbl_in, + transform = function(out) { + dates <- vapply(out, inherits, "POSIXt", FUN.VALUE = logical(1L)) + tz <- toupper(names(out)) + tz[tz == "LOCAL"] <- "" + out[dates] <- Map(lubridate::with_tz, out[dates], tz[dates]) + out + } + ) + }, + + #' + arrow_write_table_arrow_roundtrip_mixed = function(ctx, con) { + #' Mixing column types in the same table is supported. + data <- list("a", 1L, 1.5) + data <- lapply(data, c, NA) + expanded <- expand.grid(a = data, b = data, c = data) + tbl_in_list <- lapply( + seq_len(nrow(expanded)), + function(i) { + as.data.frame(lapply(expanded[i, ], unlist, recursive = FALSE)) + } + ) + + lapply(tbl_in_list, test_arrow_roundtrip, con = con) + }, + + # + NULL +) + +test_arrow_roundtrip <- function(...) { + test_arrow_roundtrip_one(..., .add_na = "none") + test_arrow_roundtrip_one(..., .add_na = "above") + test_arrow_roundtrip_one(..., .add_na = "below") +} + +test_arrow_roundtrip_one <- function(con, tbl_in, tbl_expected = tbl_in, transform = identity, + name = NULL, use_append = FALSE, .add_na = "none") { + # Need data frames here because streams can be collected only once + stopifnot(is.data.frame(tbl_in)) + stopifnot(is.data.frame(tbl_expected)) + + force(tbl_expected) + if (.add_na == "above") { + tbl_in <- stream_add_na_above(tbl_in) + tbl_expected <- stream_add_na_above(tbl_expected) + } else if (.add_na == "below") { + tbl_in <- stream_add_na_below(tbl_in) + tbl_expected <- stream_add_na_below(tbl_expected) + } + + if (is.null(name)) { + name <- random_table_name() + } + + local_remove_test_table(con, name = name) + + if (use_append) { + dbCreateTableArrow(con, name, tbl_in %>% stream_frame()) + dbAppendTableArrow(con, name, tbl_in %>% stream_frame()) + } else { + dbWriteTableArrow(con, name, tbl_in %>% stream_frame()) + } + + tbl_read <- check_df(dbReadTable(con, name, check.names = FALSE)) + tbl_out <- transform(tbl_read) + expect_equal_df(tbl_out, tbl_expected) +} + +stream_add_na_above <- function(tbl) { + idx <- c(NA, seq_len(nrow(tbl))) + tbl <- tbl[idx, , drop = FALSE] + unrowname(tbl) +} + +stream_add_na_below <- function(tbl) { + idx <- c(seq_len(nrow(tbl)), NA) + tbl <- tbl[idx, , drop = FALSE] + unrowname(tbl) +} diff --git a/R/spec-arrow.R b/R/spec-arrow.R new file mode 100644 index 000000000..2f952e4a1 --- /dev/null +++ b/R/spec-arrow.R @@ -0,0 +1,27 @@ +#' @format NULL +spec_arrow <- c( + spec_arrow_send_query_arrow, + spec_arrow_fetch_arrow, + spec_arrow_get_query_arrow, + spec_arrow_read_table_arrow, + spec_arrow_write_table_arrow, + spec_arrow_create_table_arrow, + spec_arrow_append_table_arrow, + spec_arrow_bind, + spec_arrow_roundtrip, + # + NULL +) + +stream_frame <- function(..., .select = NULL) { + data <- data.frame(..., stringsAsFactors = FALSE) + out <- arrow::as_record_batch_reader(data) + + if (!is.null(.select)) { + out <- + out %>% + dplyr::select({{ .select }}) + } + + out +} diff --git a/R/spec-result-clear-result.R b/R/spec-result-clear-result.R index 984c60f70..76ffbfdac 100644 --- a/R/spec-result-clear-result.R +++ b/R/spec-result-clear-result.R @@ -1,5 +1,6 @@ #' spec_result_clear_result #' @family result specifications +#' @family Arrow specifications #' @usage NULL #' @format NULL #' @keywords NULL @@ -12,33 +13,53 @@ spec_result_clear_result <- list( clear_result_return_query = function(con) { #' @return #' `dbClearResult()` returns `TRUE`, invisibly, for result sets obtained from - #' both `dbSendQuery()` + #' `dbSendQuery()`, res <- dbSendQuery(con, trivial_query()) expect_invisible_true(dbClearResult(res)) }, clear_result_return_statement = function(ctx, con, table_name) { - #' and `dbSendStatement()`. + #' `dbSendStatement()`, res <- dbSendStatement(con, ctx$tweaks$create_table_as(table_name, "SELECT 1")) expect_invisible_true(dbClearResult(res)) }, + clear_result_return_query_arrow = function(ctx, con, table_name) { + # Failed on SQL Server + skip_if_not_dbitest(ctx, "1.7.4") + + #' or `dbSendQueryArrow()`, + res <- dbSendQueryArrow(con, ctx$tweaks$create_table_as(table_name, "SELECT 1")) + expect_invisible_true(dbClearResult(res)) + }, + #' cannot_clear_result_twice_query = function(con) { #' @section Failure modes: #' An attempt to close an already closed result set issues a warning + #' for `dbSendQuery()`, res <- dbSendQuery(con, trivial_query()) dbClearResult(res) expect_warning(expect_invisible_true(dbClearResult(res))) }, cannot_clear_result_twice_statement = function(ctx, con, table_name) { - #' in both cases. + #' `dbSendStatement()`, res <- dbSendStatement(con, ctx$tweaks$create_table_as(table_name, "SELECT 1")) dbClearResult(res) expect_warning(expect_invisible_true(dbClearResult(res))) }, + cannot_clear_result_twice_query_arrow = function(ctx, con, table_name) { + # Failed on SQL Server + skip_if_not_dbitest(ctx, "1.7.4") + + #' and `dbSendQueryArrow()`, + res <- dbSendQueryArrow(con, ctx$tweaks$create_table_as(table_name, "SELECT 1")) + dbClearResult(res) + expect_warning(expect_invisible_true(dbClearResult(res))) + }, + #' @section Specification: #' `dbClearResult()` frees all resources associated with retrieving #' the result of a query or update operation. diff --git a/R/test-all.R b/R/test-all.R index 18b255435..ad90223ab 100644 --- a/R/test-all.R +++ b/R/test-all.R @@ -30,6 +30,7 @@ test_all <- function(skip = NULL, run_only = NULL, ctx = get_default_context()) if (run_all || Sys.getenv("DBITEST_ONLY_SQL") != "") test_sql(skip = skip, run_only = run_only, ctx = ctx) if (run_all || Sys.getenv("DBITEST_ONLY_META") != "") test_meta(skip = skip, run_only = run_only, ctx = ctx) if (run_all || Sys.getenv("DBITEST_ONLY_TRANSACTION") != "") test_transaction(skip = skip, run_only = run_only, ctx = ctx) + if (run_all || Sys.getenv("DBITEST_ONLY_ARROW") != "") test_arrow(skip = skip, run_only = run_only, ctx = ctx) if (run_all || Sys.getenv("DBITEST_ONLY_COMPLIANCE") != "") test_compliance(skip = skip, run_only = run_only, ctx = ctx) # stress tests are not tested by default (#92) invisible() diff --git a/R/test-arrow.R b/R/test-arrow.R new file mode 100644 index 000000000..5abc597c6 --- /dev/null +++ b/R/test-arrow.R @@ -0,0 +1,18 @@ +#' @name test_all +#' @aliases NULL +#' @section Tests: +#' [test_arrow()]: +#' Test Arrow methods +NULL + +#' Test Arrow methods +#' +#' @inheritParams test_all +#' @include test-transaction.R +#' @family tests +#' @export +test_arrow <- function(skip = NULL, run_only = NULL, ctx = get_default_context()) { + test_suite <- "Arrow" + + run_tests(ctx, spec_arrow, skip, run_only, test_suite) +} diff --git a/R/test-compliance.R b/R/test-compliance.R index 702a4effe..f97e1f31e 100644 --- a/R/test-compliance.R +++ b/R/test-compliance.R @@ -8,7 +8,7 @@ NULL #' Test full compliance to DBI #' #' @inheritParams test_all -#' @include test-transaction.R +#' @include test-arrow.R #' @family tests #' @export test_compliance <- function(skip = NULL, run_only = NULL, ctx = get_default_context()) { diff --git a/R/utils.R b/R/utils.R index 8870df3b9..8adad3437 100644 --- a/R/utils.R +++ b/R/utils.R @@ -51,9 +51,10 @@ local_remove_test_table <- function(con, name, frame = rlang::caller_env()) { get_penguins <- function(ctx) { datasets_penguins <- unrowname(palmerpenguins::penguins[c(1, 153, 277), ]) - if (!isTRUE(ctx$tweaks$strict_identifier)) { - names(datasets_penguins) <- gsub("_", ".", names(datasets_penguins), fixed = TRUE) - } + # FIXME: better handling of DBI backends that do support factors + datasets_penguins$species <- as.character(datasets_penguins$species) + datasets_penguins$island <- as.character(datasets_penguins$island) + datasets_penguins$sex <- as.character(datasets_penguins$sex) as.data.frame(datasets_penguins) } @@ -93,3 +94,8 @@ check_df <- function(df) { df } + +check_arrow <- function(stream) { + # Arrow returns a tibble when it shouldn't + check_df(as.data.frame(as.data.frame(stream))) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index ab0227458..c76fbb5e9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -59,6 +59,11 @@ reference: - test_transaction - starts_with("spec_transaction") +- title: Arrow functions + contents: + - test_arrow + - starts_with("spec_arrow") + - title: Full compliance to DBI contents: - test_compliance diff --git a/man/spec_arrow_append_table_arrow.Rd b/man/spec_arrow_append_table_arrow.Rd new file mode 100644 index 000000000..785bba167 --- /dev/null +++ b/man/spec_arrow_append_table_arrow.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-append-table-arrow.R +\docType{data} +\name{spec_arrow_append_table_arrow} +\alias{spec_arrow_append_table_arrow} +\title{spec_arrow_append_table_arrow} +\value{ +\code{dbAppendTableArrow()} returns a +scalar +numeric. +} +\description{ +spec_arrow_append_table_arrow +} +\section{Failure modes}{ + +If the table does not exist, +or the new data in \code{values} is not a data frame or has different column names, +an error is raised; the remote table remains unchanged. + +An error is raised when calling this method for a closed +or invalid connection. +An error is also raised +if \code{name} cannot be processed with \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}} +or if this results in a non-scalar. +} + +\section{Specification}{ + +SQL keywords can be used freely in table names, column names, and data. +Quotes, commas, spaces, and other special characters such as newlines and tabs, +can also be used in the data, +and, if the database supports non-syntactic identifiers, +also for table names +and column names. + +The following data types must be supported at least, +and be read identically with \code{\link[=dbReadTable]{dbReadTable()}}: +\itemize{ +\item integer +\item numeric +(the behavior for \code{Inf} and \code{NaN} is not specified) +\item logical +\item \code{NA} as NULL +\item 64-bit values (using \code{"bigint"} as field type); the result can be +\itemize{ +\item converted to a numeric, which may lose precision, +\item converted a character vector, which gives the full decimal +representation +\item written to another table and read again unchanged +} +\item character (in both UTF-8 +and native encodings), +supporting empty strings +(before and after non-empty strings) +\item factor (returned as character, +with a warning) +\item list of raw +(if supported by the database) +\item objects of type \link[blob:blob]{blob::blob} +(if supported by the database) +\item date +(if supported by the database; +returned as \code{Date}) +also for dates prior to 1970 or 1900 or after 2038 +\item time +(if supported by the database; +returned as objects that inherit from \code{difftime}) +\item timestamp +(if supported by the database; +returned as \code{POSIXct} +respecting the time zone but not necessarily preserving the +input time zone), +also for timestamps prior to 1970 or 1900 or after 2038 +respecting the time zone but not necessarily preserving the +input time zone) +} + +Mixing column types in the same table is supported. + + +The \code{name} argument is processed as follows, +to support databases that allow non-syntactic names for their objects: +\itemize{ +\item If an unquoted table name as string: \code{dbAppendTableArrow()} will do the quoting, +perhaps by calling \code{dbQuoteIdentifier(conn, x = name)} +\item If the result of a call to \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}}: no more quoting is done +to support databases that allow non-syntactic names for their objects: +} + + +The \code{value} argument must be a data frame +with a subset of the columns of the existing table. +The order of the columns does not matter. +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_arrow_write_table_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_arrow_create_table_arrow.Rd b/man/spec_arrow_create_table_arrow.Rd new file mode 100644 index 000000000..8ee35ee1e --- /dev/null +++ b/man/spec_arrow_create_table_arrow.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-create-table-arrow.R +\docType{data} +\name{spec_arrow_create_table_arrow} +\alias{spec_arrow_create_table_arrow} +\title{spec_arrow_create_table_arrow} +\value{ +\code{dbCreateTableArrow()} returns \code{TRUE}, invisibly. +} +\description{ +spec_arrow_create_table_arrow +} +\section{Failure modes}{ + +If the table exists, an error is raised; the remote table remains unchanged. + +An error is raised when calling this method for a closed +or invalid connection. +An error is also raised +if \code{name} cannot be processed with \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}} +or if this results in a non-scalar. +Invalid values for the \code{temporary} argument +(non-scalars, +unsupported data types, +\code{NA}, +incompatible values, +duplicate names) +also raise an error. +} + +\section{Additional arguments}{ + +The following arguments are not part of the \code{dbCreateTableArrow()} generic +(to improve compatibility across backends) +but are part of the DBI specification: +\itemize{ +\item \code{temporary} (default: \code{FALSE}) +} + +They must be provided as named arguments. +See the "Specification" and "Value" sections for details on their usage. +} + +\section{Specification}{ + +The \code{name} argument is processed as follows, +to support databases that allow non-syntactic names for their objects: +\itemize{ +\item If an unquoted table name as string: \code{dbCreateTableArrow()} will do the quoting, +perhaps by calling \code{dbQuoteIdentifier(conn, x = name)} +\item If the result of a call to \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}}: no more quoting is done +} + +If the \code{temporary} argument is \code{TRUE}, the table is not available in a +second connection and is gone after reconnecting. +Not all backends support this argument. +A regular, non-temporary table is visible in a second connection, +in a pre-existing connection, +and after reconnecting to the database. + +SQL keywords can be used freely in table names, column names, and data. +Quotes, commas, and spaces can also be used for table names and column names, +if the database supports non-syntactic identifiers. +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_arrow_write_table_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_arrow_fetch_arrow.Rd b/man/spec_arrow_fetch_arrow.Rd new file mode 100644 index 000000000..7e2bafb2f --- /dev/null +++ b/man/spec_arrow_fetch_arrow.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-fetch-arrow.R +\docType{data} +\name{spec_arrow_fetch_arrow} +\alias{spec_arrow_fetch_arrow} +\title{spec_arrow_fetch_arrow} +\value{ +\code{dbFetchArrow()} always returns an object coercible to a \link{data.frame} +with as many rows as records were fetched and as many +columns as fields in the result set, +even if the result is a single value +or has one +or zero rows. +} +\description{ +spec_arrow_fetch_arrow +} +\section{Failure modes}{ + +An attempt to fetch from a closed result set raises an error. +} + +\section{Specification}{ + +Fetching multi-row queries with one +or more columns by default returns the entire result. +The object returned by \code{dbFetchArrow()} can also be passed to +\code{\link[arrow:as_record_batch_reader]{arrow::as_record_batch_reader()}} to create an Arrow +RecordBatchReader object that can be used to read the result set +in batches. +The chunk size is implementation-specific. +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_arrow_write_table_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_arrow_get_query_arrow.Rd b/man/spec_arrow_get_query_arrow.Rd new file mode 100644 index 000000000..ef4188f01 --- /dev/null +++ b/man/spec_arrow_get_query_arrow.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-get-query-arrow.R +\docType{data} +\name{spec_arrow_get_query_arrow} +\alias{spec_arrow_get_query_arrow} +\title{spec_arrow_get_query_arrow} +\value{ +\code{dbGetQueryArrow()} always returns an object coercible to a \link{data.frame} +with as many rows as records were fetched and as many +columns as fields in the result set, +even if the result is a single value +or has one +or zero rows. +} +\description{ +spec_arrow_get_query_arrow +} +\section{Failure modes}{ + +An error is raised when issuing a query over a closed +or invalid connection, +if the syntax of the query is invalid, +or if the query is not a non-\code{NA} string. +The object returned by \code{dbGetQueryArrow()} can also be passed to +\code{\link[arrow:as_record_batch_reader]{arrow::as_record_batch_reader()}} to create an Arrow +RecordBatchReader object that can be used to read the result set +in batches. +The chunk size is implementation-specific. +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_arrow_write_table_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_arrow_read_table_arrow.Rd b/man/spec_arrow_read_table_arrow.Rd new file mode 100644 index 000000000..fb522d6a0 --- /dev/null +++ b/man/spec_arrow_read_table_arrow.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-read-table-arrow.R +\docType{data} +\name{spec_arrow_read_table_arrow} +\alias{spec_arrow_read_table_arrow} +\title{spec_arrow_read_table_arrow} +\value{ +\code{dbReadTableArrow()} returns a data frame that contains the complete data +from the remote table, effectively the result of calling \code{\link[=dbGetQuery]{dbGetQuery()}} +with \verb{SELECT * FROM }. + +An empty table is returned as a data frame with zero rows. +} +\description{ +spec_arrow_read_table_arrow +} +\section{Failure modes}{ + +An error is raised if the table does not exist. + + +An error is raised when calling this method for a closed +or invalid connection. +An error is raised +if \code{name} cannot be processed with \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}} +or if this results in a non-scalar. +} + +\section{Specification}{ + +The \code{name} argument is processed as follows, +to support databases that allow non-syntactic names for their objects: +\itemize{ +\item If an unquoted table name as string: \code{dbReadTableArrow()} will do the +quoting, +perhaps by calling \code{dbQuoteIdentifier(conn, x = name)} +\item If the result of a call to \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}}: no more quoting is done +} +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_arrow_write_table_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_arrow_send_query_arrow.Rd b/man/spec_arrow_send_query_arrow.Rd new file mode 100644 index 000000000..2f9c62f2f --- /dev/null +++ b/man/spec_arrow_send_query_arrow.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-send-query-arrow.R +\docType{data} +\name{spec_arrow_send_query_arrow} +\alias{spec_arrow_send_query_arrow} +\title{spec_result_send_query} +\value{ +\code{dbSendQueryArrow()} returns +an S4 object that inherits from \linkS4class{DBIResultArrow}. +The result set can be used with \code{\link[=dbFetchArrow]{dbFetchArrow()}} to extract records. +Once you have finished using a result, make sure to clear it +with \code{\link[=dbClearResult]{dbClearResult()}}. +} +\description{ +spec_result_send_query +} +\section{Failure modes}{ + +An error is raised when issuing a query over a closed +or invalid connection, +or if the query is not a non-\code{NA} string. +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_write_table_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_arrow_write_table_arrow.Rd b/man/spec_arrow_write_table_arrow.Rd new file mode 100644 index 000000000..6429e2817 --- /dev/null +++ b/man/spec_arrow_write_table_arrow.Rd @@ -0,0 +1,142 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spec-arrow-write-table-arrow.R +\docType{data} +\name{spec_arrow_write_table_arrow} +\alias{spec_arrow_write_table_arrow} +\title{spec_arrow_write_table_arrow} +\value{ +\code{dbWriteTableArrow()} returns \code{TRUE}, invisibly. +} +\description{ +spec_arrow_write_table_arrow +} +\section{Failure modes}{ + +If the table exists, and both \code{append} and \code{overwrite} arguments are unset, +or \code{append = TRUE} and the data frame with the new data has different +column names, +an error is raised; the remote table remains unchanged. + +An error is raised when calling this method for a closed +or invalid connection. +An error is also raised +if \code{name} cannot be processed with \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}} +or if this results in a non-scalar. +Invalid values for the additional arguments +\code{overwrite}, \code{append}, and \code{temporary} +(non-scalars, +unsupported data types, +\code{NA}, +incompatible values, +duplicate +or missing names, +incompatible columns) +also raise an error. +} + +\section{Additional arguments}{ + +The following arguments are not part of the \code{dbWriteTableArrow()} generic +(to improve compatibility across backends) +but are part of the DBI specification: +\itemize{ +\item \code{overwrite} (default: \code{FALSE}) +\item \code{append} (default: \code{FALSE}) +\item \code{temporary} (default: \code{FALSE}) +} + +They must be provided as named arguments. +See the "Specification" and "Value" sections for details on their usage. +} + +\section{Specification}{ + +The \code{name} argument is processed as follows, +to support databases that allow non-syntactic names for their objects: +\itemize{ +\item If an unquoted table name as string: \code{dbWriteTableArrow()} will do the quoting, +perhaps by calling \code{dbQuoteIdentifier(conn, x = name)} +\item If the result of a call to \code{\link[=dbQuoteIdentifier]{dbQuoteIdentifier()}}: no more quoting is done +} + +The \code{value} argument must be a data frame +with a subset of the columns of the existing table if \code{append = TRUE}. +The order of the columns does not matter with \code{append = TRUE}. + +If the \code{overwrite} argument is \code{TRUE}, an existing table of the same name +will be overwritten. +This argument doesn't change behavior if the table does not exist yet. + +If the \code{append} argument is \code{TRUE}, the rows in an existing table are +preserved, and the new data are appended. +If the table doesn't exist yet, it is created. + +If the \code{temporary} argument is \code{TRUE}, the table is not available in a +second connection and is gone after reconnecting. +Not all backends support this argument. +A regular, non-temporary table is visible in a second connection, +in a pre-existing connection, +and after reconnecting to the database. + +SQL keywords can be used freely in table names, column names, and data. +Quotes, commas, spaces, and other special characters such as newlines and tabs, +can also be used in the data, +and, if the database supports non-syntactic identifiers, +also for table names +and column names. + +The following data types must be supported at least, +and be read identically with \code{\link[=dbReadTable]{dbReadTable()}}: +\itemize{ +\item integer +\item numeric +(the behavior for \code{Inf} and \code{NaN} is not specified) +\item logical +\item \code{NA} as NULL +\item 64-bit values (using \code{"bigint"} as field type); the result can be +\itemize{ +\item converted to a numeric, which may lose precision, +\item converted a character vector, which gives the full decimal +representation +\item written to another table and read again unchanged +} +\item character (in both UTF-8 +and native encodings), +supporting empty strings +before and after a non-empty string +\item factor (returned as character) +\item list of raw +(if supported by the database) +\item objects of type \link[blob:blob]{blob::blob} +(if supported by the database) +\item date +(if supported by the database; +returned as \code{Date}), +also for dates prior to 1970 or 1900 or after 2038 +\item time +(if supported by the database; +returned as objects that inherit from \code{difftime}) +\item timestamp +(if supported by the database; +returned as \code{POSIXct} +respecting the time zone but not necessarily preserving the +input time zone), +also for timestamps prior to 1970 or 1900 or after 2038 +respecting the time zone but not necessarily preserving the +input time zone) +} + +Mixing column types in the same table is supported. +} + +\seealso{ +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_result_clear_result}} +} +\concept{Arrow specifications} diff --git a/man/spec_result_clear_result.Rd b/man/spec_result_clear_result.Rd index 485640641..a17dd8012 100644 --- a/man/spec_result_clear_result.Rd +++ b/man/spec_result_clear_result.Rd @@ -6,8 +6,9 @@ \title{spec_result_clear_result} \value{ \code{dbClearResult()} returns \code{TRUE}, invisibly, for result sets obtained from -both \code{dbSendQuery()} -and \code{dbSendStatement()}. +\code{dbSendQuery()}, +\code{dbSendStatement()}, +or \code{dbSendQueryArrow()}, } \description{ spec_result_clear_result @@ -15,7 +16,9 @@ spec_result_clear_result \section{Failure modes}{ An attempt to close an already closed result set issues a warning -in both cases. +for \code{dbSendQuery()}, +\code{dbSendStatement()}, +and \code{dbSendQueryArrow()}, } \section{Specification}{ @@ -35,5 +38,15 @@ Other result specifications: \code{\link{spec_result_roundtrip}}, \code{\link{spec_result_send_query}}, \code{\link{spec_result_send_statement}} + +Other Arrow specifications: +\code{\link{spec_arrow_append_table_arrow}}, +\code{\link{spec_arrow_create_table_arrow}}, +\code{\link{spec_arrow_fetch_arrow}}, +\code{\link{spec_arrow_get_query_arrow}}, +\code{\link{spec_arrow_read_table_arrow}}, +\code{\link{spec_arrow_send_query_arrow}}, +\code{\link{spec_arrow_write_table_arrow}} } +\concept{Arrow specifications} \concept{result specifications} diff --git a/man/test_all.Rd b/man/test_all.Rd index a0fb74b4a..58087679c 100644 --- a/man/test_all.Rd +++ b/man/test_all.Rd @@ -1,7 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/test-all.R, R/test-getting-started.R, % R/test-driver.R, R/test-connection.R, R/test-result.R, R/test-sql.R, -% R/test-meta.R, R/test-transaction.R, R/test-compliance.R, R/test-stress.R +% R/test-meta.R, R/test-transaction.R, R/test-arrow.R, R/test-compliance.R, +% R/test-stress.R \name{test_all} \alias{test_all} \alias{test_some} @@ -76,6 +77,10 @@ Test metadata functions Test transaction functions +\code{\link[=test_arrow]{test_arrow()}}: +Test Arrow methods + + \code{\link[=test_compliance]{test_compliance()}}: Test full compliance to DBI diff --git a/man/test_arrow.Rd b/man/test_arrow.Rd new file mode 100644 index 000000000..1f35521d3 --- /dev/null +++ b/man/test_arrow.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/test-arrow.R +\name{test_arrow} +\alias{test_arrow} +\title{Test Arrow methods} +\usage{ +test_arrow(skip = NULL, run_only = NULL, ctx = get_default_context()) +} +\arguments{ +\item{skip}{\verb{[character()]}\cr A vector of regular expressions to match +against test names; skip test if matching any. +The regular expressions are matched against the entire test name.} + +\item{run_only}{\verb{[character()]}\cr A vector of regular expressions to match +against test names; run only these tests. +The regular expressions are matched against the entire test name.} + +\item{ctx}{\verb{[DBItest_context]}\cr A test context as created by +\code{\link[=make_context]{make_context()}}.} +} +\description{ +Test Arrow methods +} +\seealso{ +Other tests: +\code{\link{test_compliance}()}, +\code{\link{test_connection}()}, +\code{\link{test_driver}()}, +\code{\link{test_getting_started}()}, +\code{\link{test_meta}()}, +\code{\link{test_result}()}, +\code{\link{test_sql}()}, +\code{\link{test_stress}()}, +\code{\link{test_transaction}()} +} +\concept{tests} diff --git a/man/test_compliance.Rd b/man/test_compliance.Rd index df609a01f..20b579ff6 100644 --- a/man/test_compliance.Rd +++ b/man/test_compliance.Rd @@ -23,6 +23,7 @@ Test full compliance to DBI } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, \code{\link{test_getting_started}()}, diff --git a/man/test_connection.Rd b/man/test_connection.Rd index 3906aaadc..92638b251 100644 --- a/man/test_connection.Rd +++ b/man/test_connection.Rd @@ -23,6 +23,7 @@ Test the "Connection" class } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_driver}()}, \code{\link{test_getting_started}()}, diff --git a/man/test_driver.Rd b/man/test_driver.Rd index fe0ef2df1..f19ea02e6 100644 --- a/man/test_driver.Rd +++ b/man/test_driver.Rd @@ -23,6 +23,7 @@ Test the "Driver" class } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_getting_started}()}, diff --git a/man/test_getting_started.Rd b/man/test_getting_started.Rd index d8fa42723..2417173eb 100644 --- a/man/test_getting_started.Rd +++ b/man/test_getting_started.Rd @@ -24,6 +24,7 @@ and test-first development right from the start. } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, diff --git a/man/test_meta.Rd b/man/test_meta.Rd index cf17c82c9..9aec163d6 100644 --- a/man/test_meta.Rd +++ b/man/test_meta.Rd @@ -23,6 +23,7 @@ Test metadata functions } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, diff --git a/man/test_result.Rd b/man/test_result.Rd index c580ad070..4fc3f87e6 100644 --- a/man/test_result.Rd +++ b/man/test_result.Rd @@ -23,6 +23,7 @@ Test the "Result" class } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, diff --git a/man/test_sql.Rd b/man/test_sql.Rd index 914e26d1f..b872ecac2 100644 --- a/man/test_sql.Rd +++ b/man/test_sql.Rd @@ -23,6 +23,7 @@ Test SQL methods } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, diff --git a/man/test_stress.Rd b/man/test_stress.Rd index 1fd5e9200..510464dbe 100644 --- a/man/test_stress.Rd +++ b/man/test_stress.Rd @@ -19,6 +19,7 @@ Stress tests } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, diff --git a/man/test_transaction.Rd b/man/test_transaction.Rd index 19e478da0..3848f98b0 100644 --- a/man/test_transaction.Rd +++ b/man/test_transaction.Rd @@ -23,6 +23,7 @@ Test transaction functions } \seealso{ Other tests: +\code{\link{test_arrow}()}, \code{\link{test_compliance}()}, \code{\link{test_connection}()}, \code{\link{test_driver}()}, diff --git a/tests/testthat/test-consistency.R b/tests/testthat/test-consistency.R index ec41d6a10..d36e64d38 100644 --- a/tests/testthat/test-consistency.R +++ b/tests/testthat/test-consistency.R @@ -21,8 +21,12 @@ test_that("no duplicate spec names expect known exceptions", { "temporary_table", "list_objects", "table_visible_in_other_connection", + "arrow_write_table_arrow_temporary", + "arrow_write_table_arrow_visible_in_other_connection", + "arrow_create_table_arrow_visible_in_other_connection", "begin_write_disconnect", - "begin_write_commit" + "begin_write_commit", + NULL ))] dupe_names <- unique(all_names[duplicated(all_names)])