Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactor dbListTables() et al. #413

Merged
merged 1 commit into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions R/dbExistsTable_PqConnection_character.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
#' @usage NULL
dbExistsTable_PqConnection_character <- function(conn, name, ...) {
stopifnot(length(name) == 1L)
name <- dbQuoteIdentifier(conn, name)

# Convert to identifier
id <- dbUnquoteIdentifier(conn, name)[[1]]
# use (Un)QuoteIdentifier roundtrip instead of Id(table = name)
# so that quoted names (possibly incl. schema) can be passed to `name` e.g.
# name = dbQuoteIdentifier(conn, Id(schema = "sname", table = "tname"))
quoted <- dbQuoteIdentifier(conn, name)
id <- dbUnquoteIdentifier(conn, quoted)[[1]]
exists_table(conn, id)
}

Expand Down
2 changes: 1 addition & 1 deletion R/dbListFields_PqConnection_Id.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' @rdname postgres-tables
#' @usage NULL
dbListFields_PqConnection_Id <- function(conn, name, ...) {
list_fields(conn, name)
list_fields(conn, id = name)
}

#' @rdname postgres-tables
Expand Down
25 changes: 19 additions & 6 deletions R/dbListObjects_PqConnection_ANY.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) {
null_varchar <- "NULL::text"
}
query <- paste0(
"SELECT ", null_varchar, " AS schema, table_name AS table FROM INFORMATION_SCHEMA.tables\n",
"WHERE (table_schema = ANY(current_schemas(true))) AND (table_schema <> 'pg_catalog')\n",
"SELECT ", null_varchar, " AS schema, table_name AS table FROM ( \n",
list_tables(conn = conn, order_by = "table_type, table_name"),
") as table_query \n",
"UNION ALL\n",
"SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM INFORMATION_SCHEMA.tables"
"SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM ( \n",
list_tables(conn = conn, where_schema = "true"),
") as schema_query;"
)
} else {
if (!is.list(prefix)) prefix <- list(prefix)
Expand All @@ -27,10 +30,20 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) {
schemas <- vcapply(prefix[is_prefix], function(x) x@name[["schema"]])
if (length(schemas) > 0) {
schema_strings <- dbQuoteString(conn, schemas)
where_schema <-
paste0(
"table_schema IN (",
paste(schema_strings, collapse = ", "),
") \n"
)
query <- paste0(
"SELECT table_schema AS schema, table_name AS table FROM INFORMATION_SCHEMA.tables\n",
"WHERE ",
"(table_schema IN (", paste(schema_strings, collapse = ", "), "))"
"SELECT table_schema AS schema, table_name AS table FROM ( \n",
list_tables(
conn = conn,
where_schema = where_schema,
order_by = "table_type, table_name"
),
") as table_query"
)
}
}
Expand Down
9 changes: 3 additions & 6 deletions R/dbListTables_PqConnection.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
#' @rdname postgres-tables
#' @usage NULL
dbListTables_PqConnection <- function(conn, ...) {
query <- paste0(
"SELECT table_name FROM INFORMATION_SCHEMA.tables ",
"WHERE ",
"(table_schema = ANY(current_schemas(true))) AND (table_schema <> 'pg_catalog')"
)
dbGetQuery(conn, query)[[1]]
query <- list_tables(conn = conn, order_by = "table_type, table_name")

dbGetQuery(conn, query)[["table_name"]]
}

#' @rdname postgres-tables
Expand Down
114 changes: 81 additions & 33 deletions R/tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,72 @@ db_append_table <- function(conn, name, value, copy, warn) {
nrow(value)
}

list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by = NULL) {

query <- paste0(
# information_schema.table docs: https://www.postgresql.org/docs/current/infoschema-tables.html
"SELECT table_schema, table_name \n",
"FROM information_schema.tables \n",
"WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND`
)

if (is.null(where_schema)) {
# `true` in `current_schemas(true)` is necessary to get temporary tables
query <- paste0(
query,
" AND (table_schema = ANY(current_schemas(true))) \n",
" AND (table_schema <> 'pg_catalog') \n"
)
} else {
query <- paste0(query, " AND ", where_schema)
}

if (!is.null(where_table)) query <- paste0(query, " AND ", where_table)

if (!is.null(order_by)) query <- paste0(query, "ORDER BY ", order_by)

query
}

exists_table <- function(conn, id) {
name <- id@name
stopifnot("table" %in% names(name))
table_name <- dbQuoteString(conn, name[["table"]])
where_table <- paste0("table_name = ", table_name, " \n")

if ("schema" %in% names(name)) {
schema_name <- dbQuoteString(conn, name[["schema"]])
where_schema <- paste0("table_schema = ", schema_name, " \n")
} else {
where_schema <- NULL
}
query <- paste0(
"SELECT COUNT(*) FROM ",
find_table(conn, name)
"SELECT EXISTS ( \n",
list_tables(conn, where_schema = where_schema, where_table = where_table),
")"
)

dbGetQuery(conn, query)[[1]] >= 1
dbGetQuery(conn, query)[[1]]
}

find_table <- function(conn, id, inf_table = "tables", only_first = FALSE) {
list_fields <- function(conn, id) {
name <- id@name

is_redshift <- is(conn, "RedshiftConnection")

if ("schema" %in% names(id)) {
# get relevant schema
if ("schema" %in% names(name)) {
# either the user provides the schema
query <- paste0(
"(SELECT 1 AS nr, ",
dbQuoteString(conn, id[["schema"]]), "::varchar",
dbQuoteString(conn, name[["schema"]]), "::varchar",
" AS table_schema) t"
)

# only_first not necessary,
# as there cannot be multiple tables with the same name in a single schema
only_first <- FALSE

# or we have to look the table up in the schemas on the search path
} else if (is_redshift) {
# A variant of the Postgres version that uses CTEs and generate_series()
# instead of generate_subscripts(), the latter is not supported on Redshift
Expand All @@ -158,25 +204,31 @@ find_table <- function(conn, id, inf_table = "tables", only_first = FALSE) {
)
only_first <- FALSE
Copy link
Contributor Author

@dpprdan dpprdan Nov 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This override was already present in find_table() (but find_table() was generally called with only_first = TRUE by list_fields()).

I think this is a remainder of the first two commits in #326, where indeed only_first isn't necessary, because that used current_schema() (not "schema_s_").

It should be only_first = TRUE now though because there can be multiple schemas on the search path on Redshift as well (I think).

} else {
# https://stackoverflow.com/a/8767450/946850
# Get `current_schemas()` in search_path order
# so $user and temp tables take precedence over the public schema (by default)
# https://www.postgresql.org/docs/current/ddl-schemas.html#DDL-SCHEMAS-PATH
# https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-SEARCH-PATH
# How to unnest `current_schemas(true)` array with element number (works since v9.4):
# https://stackoverflow.com/a/8767450/2114932
query <- paste0(
"(SELECT nr, schemas[nr] AS table_schema FROM ",
"(SELECT *, generate_subscripts(schemas, 1) AS nr FROM ",
"(SELECT current_schemas(true) AS schemas) ",
"t) ",
"tt WHERE schemas[nr] <> 'pg_catalog') ",
"ttt"
"(",
"SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n",
"WHERE table_schema != 'pg_catalog'",
") schemas_on_path"
)
only_first <- TRUE
}

table <- dbQuoteString(conn, id[["table"]])
# join columns info
table <- dbQuoteString(conn, name[["table"]])
query <- paste0(
query, " ",
"INNER JOIN INFORMATION_SCHEMA.", inf_table, " USING (table_schema) ",
"INNER JOIN INFORMATION_SCHEMA.COLUMNS USING (table_schema) ",
"WHERE table_name = ", table
)

if (only_first) {
# we can only detect duplicate table names after we know in which schemas they are
# https://stackoverflow.com/a/31814584/946850
query <- paste0(
"(SELECT *, rank() OVER (ORDER BY nr) AS rnr ",
Expand All @@ -185,7 +237,19 @@ find_table <- function(conn, id, inf_table = "tables", only_first = FALSE) {
)
}

query
query <- paste0(
"SELECT column_name FROM ",
query, " ",
"ORDER BY ordinal_position"
)

fields <- dbGetQuery(conn, query)[[1]]

if (length(fields) == 0) {
stop("Table ", dbQuoteIdentifier(conn, id), " not found.", call. = FALSE)
}

fields
}

find_temp_schema <- function(conn, fail_if_missing = TRUE) {
Expand Down Expand Up @@ -214,19 +278,3 @@ find_temp_schema <- function(conn, fail_if_missing = TRUE) {
return(connection_get_temp_schema(conn@ptr))
}
}

list_fields <- function(conn, id) {
name <- id@name

query <- find_table(conn, name, "columns", only_first = TRUE)
query <- paste0(
"SELECT column_name FROM ",
query, " ",
"ORDER BY ordinal_position"
)
fields <- dbGetQuery(conn, query)[[1]]
if (length(fields) == 0) {
stop("Table ", dbQuoteIdentifier(conn, name), " not found.", call. = FALSE)
}
fields
}
Loading