diff --git a/DESCRIPTION b/DESCRIPTION index 67f18b15..d299e238 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: wk Title: Lightweight Well-Known Geometry Parsing Version: 0.7.3.9000 -Authors@R: +Authors@R: c( person(given = "Dewey", family = "Dunnington", @@ -12,23 +12,27 @@ Authors@R: family = "Pebesma", role = c("aut"), email = "edzer.pebesma@uni-muenster.de", - comment = c(ORCID = "0000-0001-8049-7069")) + comment = c(ORCID = "0000-0001-8049-7069")), + person(given = "Anthony", + family = "North", + email = "anthony.jl.north@gmail.com", + role = c("ctb")) ) Maintainer: Dewey Dunnington Description: Provides a minimal R and C++ API for parsing well-known binary and well-known text representation of - geometries to and from R-native formats. + geometries to and from R-native formats. Well-known binary is compact and fast to parse; well-known text is human-readable and is useful for writing tests. These formats are - useful in R only if the information they contain can be - accessed in R, for which high-performance functions + useful in R only if the information they contain can be + accessed in R, for which high-performance functions are provided here. License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 -Suggests: +Suggests: testthat (>= 3.0.0), vctrs (>= 0.3.0), sf, @@ -37,6 +41,6 @@ Suggests: URL: https://paleolimbot.github.io/wk/, https://github.com/paleolimbot/wk BugReports: https://github.com/paleolimbot/wk/issues Config/testthat/edition: 3 -Depends: +Depends: R (>= 2.10) LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 4460f599..dc4b9829 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -465,6 +465,7 @@ export(wk_void) export(wk_void_handler) export(wk_writer) export(wkb) +export(wkb_to_hex) export(wkb_translate_wkb) export(wkb_translate_wkt) export(wkb_writer) diff --git a/R/pkg-vctrs.R b/R/pkg-vctrs.R index b41048e5..1cc1564c 100644 --- a/R/pkg-vctrs.R +++ b/R/pkg-vctrs.R @@ -13,6 +13,10 @@ vec_proxy.wk_wkb <- function(x, ...) { unclass(x) } +vec_proxy_equal.wk_wkb <- function(x, ...) { + wkb_to_hex(x) +} + vec_restore.wk_wkb <- function(x, to, ...) { crs_out <- attr(to, "crs", exact = TRUE) %||% attr(x, "crs", exact = TRUE) geodesic_out <- attr(to, "geodesic", exact = TRUE) %||% attr(x, "geodesic", exact = TRUE) diff --git a/R/wkb.R b/R/wkb.R index 18d366e5..aaceaa89 100644 --- a/R/wkb.R +++ b/R/wkb.R @@ -148,3 +148,19 @@ format.wk_wkb <- function(x, ...) { as.character.wk_wkb <- function(x, ...) { format(x, ...) } + + +#' Convert well-known binary to hex +#' +#' @param x A [wkb()] vector +#' +#' @return A hex encoded [wkb()] vector +#' @export +#' +#' @examples +#' x <- as_wkb(xyz(1:5, 6:10, 11:15)) +#' wkb_to_hex(x) +#' +wkb_to_hex <- function(x) { + .Call(wk_c_wkb_to_hex, as_wkb(x)) +} diff --git a/R/zzz.R b/R/zzz.R index 1093f9cb..08d2c127 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -30,6 +30,9 @@ s3_register("sf::st_bbox", "wk_grd") s3_register("sf::st_crs", "wk_grd") s3_register("sf::st_crs<-", "wk_grd") + + # wkb vec_proxy_equal + s3_register("vctrs::vec_proxy_equal", "wk_wkb") } .onUnload <- function (libpath) { diff --git a/man/wk-package.Rd b/man/wk-package.Rd index d70bebb6..7ff96e7a 100644 --- a/man/wk-package.Rd +++ b/man/wk-package.Rd @@ -25,5 +25,10 @@ Authors: \item Edzer Pebesma \email{edzer.pebesma@uni-muenster.de} (\href{https://orcid.org/0000-0001-8049-7069}{ORCID}) } +Other contributors: +\itemize{ + \item Anthony North \email{anthony.jl.north@gmail.com} [contributor] +} + } \keyword{internal} diff --git a/man/wkb_to_hex.Rd b/man/wkb_to_hex.Rd new file mode 100644 index 00000000..11272f6f --- /dev/null +++ b/man/wkb_to_hex.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wkb.R +\name{wkb_to_hex} +\alias{wkb_to_hex} +\title{Convert well-known binary to hex} +\usage{ +wkb_to_hex(x) +} +\arguments{ +\item{x}{A \code{\link[=wkb]{wkb()}} vector} +} +\value{ +A hex encoded \code{\link[=wkb]{wkb()}} vector +} +\description{ +Convert well-known binary to hex +} +\examples{ +x <- as_wkb(xyz(1:5, 6:10, 11:15)) +wkb_to_hex(x) + +} diff --git a/src/init.c b/src/init.c index 3c6ddbc1..ca49861d 100644 --- a/src/init.c +++ b/src/init.c @@ -31,6 +31,7 @@ SEXP wk_c_trans_set_new(SEXP xy, SEXP use_z, SEXP use_m); SEXP wk_c_trans_filter_new(SEXP handler_xptr, SEXP trans_xptr); SEXP wk_c_wkb_is_na(SEXP geom); SEXP wk_c_wkb_is_raw_or_null(SEXP geom); +SEXP wk_c_wkb_to_hex(SEXP geom); SEXP wk_c_vertex_filter_new(SEXP handler_xptr, SEXP add_details); SEXP wk_c_handler_void_new(void); SEXP wk_c_handler_addr(SEXP xptr); @@ -66,6 +67,7 @@ static const R_CallMethodDef CallEntries[] = { {"wk_c_trans_filter_new", (DL_FUNC) &wk_c_trans_filter_new, 2}, {"wk_c_wkb_is_na", (DL_FUNC) &wk_c_wkb_is_na, 1}, {"wk_c_wkb_is_raw_or_null", (DL_FUNC) &wk_c_wkb_is_raw_or_null, 1}, + {"wk_c_wkb_to_hex", (DL_FUNC) &wk_c_wkb_to_hex, 1}, {"wk_c_vertex_filter_new", (DL_FUNC) &wk_c_vertex_filter_new, 2}, {"wk_c_handler_void_new", (DL_FUNC) &wk_c_handler_void_new, 0}, {"wk_c_handler_addr", (DL_FUNC) &wk_c_handler_addr, 1}, diff --git a/src/vctr.c b/src/vctr.c index 6ae9deb1..a7daa895 100644 --- a/src/vctr.c +++ b/src/vctr.c @@ -2,6 +2,8 @@ #include #include +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + SEXP wk_c_wkb_is_na(SEXP geom) { R_xlen_t size = Rf_xlength(geom); SEXP result = PROTECT(Rf_allocVector(LGLSXP, size)); @@ -28,3 +30,53 @@ SEXP wk_c_wkb_is_raw_or_null(SEXP geom) { UNPROTECT(1); return result; } + +static R_xlen_t wk_max_length(const SEXP geom) { + const R_xlen_t size = Rf_xlength(geom); + R_xlen_t max = 0; + + for (R_xlen_t i = 0; i < size; i++) { + max = MAX(max, Rf_xlength(VECTOR_ELT(geom, i))); + } + + return max; +} + +static void wk_bin_to_hex(char* dst, const unsigned char* src, const R_xlen_t n) { + static const char hex[16] = "0123456789abcdef"; + + for (R_xlen_t i = 0; i < n; i++) { + const unsigned char byte = src[i]; + dst[2 * i] = hex[(byte >> 4) & 0xf]; + dst[2 * i + 1] = hex[byte & 0xf]; + } + + dst[2 * n] = '\0'; +} + +SEXP wk_c_wkb_to_hex(const SEXP geom) { + const R_xlen_t size = Rf_xlength(geom); + SEXP result = PROTECT(Rf_allocVector(STRSXP, size)); + + const R_xlen_t buf_size = wk_max_length(geom) * 2 + 1; + SEXP buf_shelter = PROTECT(Rf_allocVector(RAWSXP, buf_size)); + char* buf = (char*)RAW(buf_shelter); + + for (R_xlen_t i = 0; i < size; i++) { + if (((i + 1) % 1000) == 0) R_CheckUserInterrupt(); + + const SEXP item = VECTOR_ELT(geom, i); + const R_xlen_t item_len = Rf_xlength(item); + + if (item == R_NilValue) { + SET_STRING_ELT(result, i, NA_STRING); + continue; + } + + wk_bin_to_hex(buf, RAW(item), item_len); + SET_STRING_ELT(result, i, Rf_mkChar(buf)); + } + + UNPROTECT(2); + return result; +} diff --git a/tests/testthat/test-wkb.R b/tests/testthat/test-wkb.R index 80c447e6..778dd41f 100644 --- a/tests/testthat/test-wkb.R +++ b/tests/testthat/test-wkb.R @@ -108,3 +108,65 @@ test_that("examples as wkb roundtrip", { ) } }) + +test_that("wk_c_wkb_to_hex works", { + list_of_raw <- list(as.raw(0:255), raw(0), NULL) + expect_identical( + .Call(wk_c_wkb_to_hex, list_of_raw), + c(paste(sprintf("%02x", 0:255), collapse = ""), "", NA_character_) + ) +}) + +test_that("wkb_to_hex works", { + features <- wkt(c("POINT (0 0)", "LINESTRING (1 1, 2 2)", "POLYGON EMPTY", NA)) + + # little endian + wkb_little <- wk_handle(features, wkb_writer(endian = 1)) + hex_little <- c( + "010100000000000000000000000000000000000000", + "010200000002000000000000000000f03f000000000000f03f00000000000000400000000000000040", + "010300000000000000", + NA_character_ + ) + + expect_equal(wkb_to_hex(wkb_little), hex_little) + + # big endian + wkb_big <- wk_handle(features, wkb_writer(endian = 0)) + hex_big <- c( + "000000000100000000000000000000000000000000", + "0000000002000000023ff00000000000003ff000000000000040000000000000004000000000000000", + "000000000300000000", + NA_character_ + ) + + expect_equal(wkb_to_hex(wkb_big), hex_big) +}) + +test_that("vec_equal(wkb) works", { + points <- wkt(c("POINT (1 1)", "POINT (2 2)", "POINT (3 3)")) + + # little endian + wkb_little <- wk_handle(points, wkb_writer(endian = 1)) + hex_little <- c( + "0101000000000000000000f03f000000000000f03f", + "010100000000000000000000400000000000000040", + "010100000000000000000008400000000000000840" + ) + + expect_equal(vctrs::vec_proxy_equal(wkb_little), hex_little) + expect_equal(vctrs::vec_equal(wkb_little, wkb_little), c(TRUE, TRUE, TRUE)) + expect_equal(vctrs::vec_equal(wkb_little[1], wkb_little[2]), FALSE) + + # big endian + wkb_big <- wk_handle(points, wkb_writer(endian = 0)) + hex_big <- c( + "00000000013ff00000000000003ff0000000000000", + "000000000140000000000000004000000000000000", + "000000000140080000000000004008000000000000" + ) + + expect_equal(vctrs::vec_proxy_equal(wkb_big), hex_big) + expect_equal(vctrs::vec_equal(wkb_big, wkb_big), c(TRUE, TRUE, TRUE)) + expect_equal(vctrs::vec_equal(wkb_big[1], wkb_big[2]), FALSE) +})