Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dialect argument for ExecuteSQL #1646

Merged
merged 4 commits into from
Apr 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# version 0.9-9

* `st_read` gains an argument `dialect` to choose the dialect used by `ExecuteSQL`; #1646

* `st_write` gains an argument `config_options` to set GDAL config options; #1618

* fix regression in `sf_project` when `keep = TRUE`; #1635
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ CPL_get_layers <- function(datasource, options, do_count = FALSE) {
.Call('_sf_CPL_get_layers', PACKAGE = 'sf', datasource, options, do_count)
}

CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi = TRUE, int64_as_string = FALSE, dsn_exists = TRUE, dsn_isdb = FALSE, width = 80L) {
.Call('_sf_CPL_read_ogr', PACKAGE = 'sf', datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, dialect, promote_to_multi = TRUE, int64_as_string = FALSE, dsn_exists = TRUE, dsn_isdb = FALSE, width = 80L) {
.Call('_sf_CPL_read_ogr', PACKAGE = 'sf', datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, dialect, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
}

CPL_gdalinfo <- function(obj, options, oo) {
Expand Down
45 changes: 33 additions & 12 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ set_utf8 = function(x) {
#' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert
#' all to the Multi variety; defaults to \code{TRUE}
#' @param stringsAsFactors logical; logical: should character vectors be
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to
#' \code{default.stringsAsFactors()}
#' @param int64_as_string logical; if TRUE, Int64 attributes are returned as
Expand Down Expand Up @@ -80,12 +80,17 @@ set_utf8 = function(x) {
#' For \code{query} with a character \code{dsn} the query text is handed to
#' 'ExecuteSQL' on the GDAL/OGR data set and will result in the creation of a
#' new layer (and \code{layer} is ignored). See 'OGRSQL'
#' \url{https://gdal.org/user/ogr_sql_dialect.html} for details. Please note that the
#' 'FID' special field is driver-dependent, and may be either 0-based (e.g. ESRI
#' Shapefile), 1-based (e.g. MapInfo) or arbitrary (e.g. OSM). Other features of
#' OGRSQL are also likely to be driver dependent. The available layer names may
#' be obtained with
#' \link{st_layers}. Care will be required to properly escape the use of some layer names.
#' \url{https://gdal.org/user/ogr_sql_dialect.html} for details. The parameter
#' \code{dialect} can be used to select the 'dialect' used by 'ExecuteSQL'. See
#' \href{https://gdal.org/api/gdaldataset_cpp.html#_CPPv4N11GDALDataset10ExecuteSQLEPKcP11OGRGeometryPKc}{here}
#' and \href{https://gdal.org/user/sql_sqlite_dialect.html}{here} for more
#' details. See \url{https://github.com/r-spatial/sf/pull/1646} for simple
#' examples of spatial queries using the \code{SQLite} dialect. Please note that
#' the 'FID' special field is driver-dependent, and may be either 0-based (e.g.
#' ESRI Shapefile), 1-based (e.g. MapInfo) or arbitrary (e.g. OSM). Other
#' features of OGRSQL are also likely to be driver dependent. The available
#' layer names may be obtained with \link{st_layers}. Care will be required to
#' properly escape the use of some layer names.
#'
#' @return object of class \link{sf} when a layer was successfully read; in case
#' argument \code{layer} is missing and data source \code{dsn} does not
Expand Down Expand Up @@ -122,6 +127,21 @@ set_utf8 = function(x) {
#' wkt = st_as_text(st_geometry(nc[1,]))
#' # filter by (bbox overlaps of) first feature geometry:
#' read_sf(system.file("gpkg/nc.gpkg", package="sf"), wkt_filter = wkt)
#' # if you select the SQLite dialect, then you can use several spatial
#' # functions when building the query:
#' nc_sqlite = st_read(
#' system.file("shape/nc.shp", package="sf"),
#' query = "
#' SELECT GEOMETRY, ST_Area(ST_Transform(GEOMETRY, 32119)) AS AREA_m2
#' FROM nc
#' WHERE ST_Intersects(
#' ST_Transform(GEOMETRY, 32119),
#' GeomFromText('POINT (573193 199429)', 32119)
#' )
#' ",
#' dialect = "SQLite"
#' )
#' nc_sqlite
#' @export
st_read = function(dsn, layer, ...) UseMethod("st_read")

Expand All @@ -141,7 +161,7 @@ st_read.default = function(dsn, layer, ...) {
}

process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
geometry_column = 1, as_tibble = FALSE) {

which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE))
Expand All @@ -151,7 +171,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,

# in case no geometry is present:
if (length(which.geom) == 0) {
if (! quiet)
if (! quiet)
warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE)
x = if (!as_tibble) {
if (any(sapply(x, is.list)))
Expand Down Expand Up @@ -202,15 +222,16 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
#' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this
#' @param drivers character; limited set of driver short names to be tried (default: try all)
#' @param wkt_filter character; WKT representation of a spatial filter (may be used as bounding box, selecting overlapping geometries); see examples
#' @param dialect The dialect used by ExecuteSQL when running the \code{query}
#' @note The use of \code{system.file} in examples make sure that examples run regardless where R is installed:
#' typical users will not use \code{system.file} but give the file name directly, either with full path or relative
#' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename
#' that reside in the same directory, only one of them having extension \code{.shp}.
#' @export
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(),
int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0),
drivers = character(0), wkt_filter = character(0)) {
drivers = character(0), wkt_filter = character(0), dialect = character(0)) {

layer = if (missing(layer))
character(0)
Expand All @@ -228,7 +249,7 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
stop("`promote_to_multi' should have length one, and applies to all geometry columns")

x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
drivers, wkt_filter, dialect, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column, ...)
}
Expand Down
39 changes: 31 additions & 8 deletions man/st_read.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,8 @@ BEGIN_RCPP
END_RCPP
}
// CPL_read_ogr
Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, Rcpp::CharacterVector query, Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser, Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers, Rcpp::CharacterVector wkt_filter, bool promote_to_multi, bool int64_as_string, bool dsn_exists, bool dsn_isdb, int width);
RcppExport SEXP _sf_CPL_read_ogr(SEXP datasourceSEXP, SEXP layerSEXP, SEXP querySEXP, SEXP optionsSEXP, SEXP quietSEXP, SEXP toTypeUserSEXP, SEXP fid_column_nameSEXP, SEXP driversSEXP, SEXP wkt_filterSEXP, SEXP promote_to_multiSEXP, SEXP int64_as_stringSEXP, SEXP dsn_existsSEXP, SEXP dsn_isdbSEXP, SEXP widthSEXP) {
Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, Rcpp::CharacterVector query, Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser, Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers, Rcpp::CharacterVector wkt_filter, Rcpp::CharacterVector dialect, bool promote_to_multi, bool int64_as_string, bool dsn_exists, bool dsn_isdb, int width);
RcppExport SEXP _sf_CPL_read_ogr(SEXP datasourceSEXP, SEXP layerSEXP, SEXP querySEXP, SEXP optionsSEXP, SEXP quietSEXP, SEXP toTypeUserSEXP, SEXP fid_column_nameSEXP, SEXP driversSEXP, SEXP wkt_filterSEXP, SEXP dialectSEXP, SEXP promote_to_multiSEXP, SEXP int64_as_stringSEXP, SEXP dsn_existsSEXP, SEXP dsn_isdbSEXP, SEXP widthSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -319,12 +319,13 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type fid_column_name(fid_column_nameSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type drivers(driversSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type wkt_filter(wkt_filterSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type dialect(dialectSEXP);
Rcpp::traits::input_parameter< bool >::type promote_to_multi(promote_to_multiSEXP);
Rcpp::traits::input_parameter< bool >::type int64_as_string(int64_as_stringSEXP);
Rcpp::traits::input_parameter< bool >::type dsn_exists(dsn_existsSEXP);
Rcpp::traits::input_parameter< bool >::type dsn_isdb(dsn_isdbSEXP);
Rcpp::traits::input_parameter< int >::type width(widthSEXP);
rcpp_result_gen = Rcpp::wrap(CPL_read_ogr(datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width));
rcpp_result_gen = Rcpp::wrap(CPL_read_ogr(datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, dialect, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -1274,7 +1275,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_sf_CPL_gdal_segmentize", (DL_FUNC) &_sf_CPL_gdal_segmentize, 2},
{"_sf_CPL_gdal_linestring_sample", (DL_FUNC) &_sf_CPL_gdal_linestring_sample, 2},
{"_sf_CPL_get_layers", (DL_FUNC) &_sf_CPL_get_layers, 3},
{"_sf_CPL_read_ogr", (DL_FUNC) &_sf_CPL_read_ogr, 14},
{"_sf_CPL_read_ogr", (DL_FUNC) &_sf_CPL_read_ogr, 15},
{"_sf_CPL_gdalinfo", (DL_FUNC) &_sf_CPL_gdalinfo, 3},
{"_sf_CPL_gdalwarp", (DL_FUNC) &_sf_CPL_gdalwarp, 6},
{"_sf_CPL_gdalrasterize", (DL_FUNC) &_sf_CPL_gdalrasterize, 7},
Expand Down
9 changes: 7 additions & 2 deletions src/gdal_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser,
Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers,
Rcpp::CharacterVector wkt_filter,
Rcpp::CharacterVector dialect,
bool promote_to_multi = true, bool int64_as_string = false,
bool dsn_exists = true,
bool dsn_isdb = false,
Expand All @@ -490,7 +491,7 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
std::vector <char *> open_options = create_options(options, quiet);
std::vector <char *> drivers_v = create_options(drivers, quiet);
GDALDataset *poDS;
poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY,
poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY,
drivers.size() ? drivers_v.data() : NULL, open_options.data(), NULL );
if( poDS == NULL ) {
// could not open dsn
Expand Down Expand Up @@ -533,7 +534,11 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector

OGRLayer *poLayer;
if (! Rcpp::CharacterVector::is_na(query[0])) {
poLayer = poDS->ExecuteSQL(query[0], NULL, NULL);
if (dialect.size()) {
poLayer = poDS->ExecuteSQL(query[0], NULL, dialect[0]);
} else {
poLayer = poDS->ExecuteSQL(query[0], NULL, NULL);
}
if (poLayer == NULL)
Rcpp::stop("Query execution failed, cannot open layer.\n"); // #nocov
} else
Expand Down
18 changes: 18 additions & 0 deletions tests/testthat/test_read.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,21 @@ test_that("Missing data sources have useful error message (#967)", {
# delete temp file
file.remove(x)
})

test_that("SQLite dialect can be used in st_read (#1646)", {
# Define a query to
# 1) filter the counties that intesect the centroid of nc;
# 2) calculate the area of those counties
query = "
SELECT GEOMETRY, ST_Area(ST_Transform(GEOMETRY, 32119)) AS AREA_m2
FROM nc
WHERE ST_Intersects(ST_Transform(GEOMETRY, 32119), GeomFromText('POINT (573193 199429)', 32119))
"
nc_sqlite = st_read(
dsn = system.file("shape/nc.shp", package = "sf"),
query = query,
dialect = "SQLite",
quiet = TRUE
)
expect_true(nrow(nc_sqlite) == 1L)
})