Skip to content

Commit

Permalink
Update feather serializer; Add parquet serializer (#849)
Browse files Browse the repository at this point in the history
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
pachadotdev and nealrichardson committed Mar 24, 2022
1 parent f444cfd commit 0c21b78
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 25 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Suggests:
later,
readr,
yaml,
feather,
arrow,
future,
rstudioapi,
spelling,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export(parser_json)
export(parser_multi)
export(parser_none)
export(parser_octet)
export(parser_parquet)
export(parser_rds)
export(parser_read_file)
export(parser_text)
Expand Down Expand Up @@ -83,6 +84,7 @@ export(serializer_html)
export(serializer_htmlwidget)
export(serializer_jpeg)
export(serializer_json)
export(serializer_parquet)
export(serializer_pdf)
export(serializer_png)
export(serializer_print)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
## New features

* Introduces new GeoJSON serializer and parser. GeoJSON objects are parsed into `sf` objects and `sf` or `sfc` objects will be serialized into GeoJSON. (@josiahparry, #830)
* Update feather serializer to use the arrow package. The new default feather MIME type is `application/vnd.apache.arrow.file`. (@pachadotdev #849)
* Add parquet serializer and parser by using the arrow package (@pachadotdev #849)

## Bug fixes

Expand Down
3 changes: 2 additions & 1 deletion R/content-types.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ knownContentTypes <- c(
dotx = "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
xlam = "application/vnd.ms-excel.addin.macroEnabled.12",
xlsb = "application/vnd.ms-excel.sheet.binary.macroEnabled.12",
feather = "application/feather",
feather = "application/vnd.apache.arrow.file",
parquet = "application/vnd.apache.parquet",
rds = "application/rds",
tsv = "application/tab-separated-values",
csv = "application/csv",
Expand Down
22 changes: 16 additions & 6 deletions R/parse-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -480,18 +480,27 @@ parser_rds <- function(...) {
})
}

#' @describeIn parsers feather parser. See [feather::read_feather()] for more details.
#' @describeIn parsers feather parser. See [arrow::read_feather()] for more details.
#' @export
parser_feather <- function(...) {
parser_read_file(function(tmpfile) {
if (!requireNamespace("feather", quietly = TRUE)) {
stop("`feather` must be installed for `parser_feather` to work")
if (!requireNamespace("arrow", quietly = TRUE)) {
stop("`arrow` must be installed for `parser_feather` to work")
}
feather::read_feather(tmpfile, ...)
arrow::read_feather(tmpfile, ...)
})
}


#' @describeIn parsers parquet parser. See [arrow::read_parquet()] for more details.
#' @export
parser_parquet <- function(...) {
parser_read_file(function(tmpfile) {
if (!requireNamespace("arrow", quietly = TRUE)) {
stop("`arrow` must be installed for `parser_parquet` to work")
}
arrow::read_parquet(tmpfile, ...)
})
}

#' @describeIn parsers Octet stream parser. Returns the raw content.
#' @export
Expand Down Expand Up @@ -568,7 +577,8 @@ register_parsers_onLoad <- function() {
register_parser("octet", parser_octet, fixed = "application/octet-stream")
register_parser("form", parser_form, fixed = "application/x-www-form-urlencoded")
register_parser("rds", parser_rds, fixed = "application/rds")
register_parser("feather", parser_feather, fixed = "application/feather")
register_parser("feather", parser_feather, fixed = c("application/vnd.apache.arrow.file", "application/feather"))
register_parser("parquet", parser_parquet, fixed = "application/vnd.apache.parquet")
register_parser("text", parser_text, fixed = "text/plain", regex = "^text/")
register_parser("tsv", parser_tsv, fixed = c("application/tab-separated-values", "text/tab-separated-values"))
# yaml types: https://stackoverflow.com/a/38000954/591574
Expand Down
26 changes: 21 additions & 5 deletions R/serializer.R
Original file line number Diff line number Diff line change
Expand Up @@ -263,17 +263,32 @@ serializer_rds <- function(version = "2", ascii = FALSE, ..., type = "applicatio
})
}

#' @describeIn serializers feather serializer. See also: [feather::write_feather()]
#' @describeIn serializers feather serializer. See also: [arrow::write_feather()]
#' @export
serializer_feather <- function(type = "application/feather") {
if (!requireNamespace("feather", quietly = TRUE)) {
stop("`feather` must be installed for `serializer_feather` to work")
serializer_feather <- function(type = "application/vnd.apache.arrow.file") {
if (!requireNamespace("arrow", quietly = TRUE)) {
stop("`arrow` must be installed for `serializer_feather` to work")
}
serializer_write_file(
fileext = ".feather",
type = type,
write_fn = function(val, tmpfile) {
feather::write_feather(val, tmpfile)
arrow::write_feather(val, tmpfile)
}
)
}

#' @describeIn serializers parquet serializer. See also: [arrow::write_parquet()]
#' @export
serializer_parquet <- function(type = "application/vnd.apache.parquet") {
if (!requireNamespace("arrow", quietly = TRUE)) {
stop("`arrow` must be installed for `serializer_parquet` to work")
}
serializer_write_file(
fileext = ".parquet",
type = type,
write_fn = function(val, tmpfile) {
arrow::write_parquet(val, tmpfile)
}
)
}
Expand Down Expand Up @@ -614,6 +629,7 @@ add_serializers_onLoad <- function() {
register_serializer("csv", serializer_csv)
register_serializer("tsv", serializer_tsv)
register_serializer("feather", serializer_feather)
register_serializer("parquet", serializer_parquet)
register_serializer("yaml", serializer_yaml)
register_serializer("geojson", serializer_geojson)

Expand Down
7 changes: 6 additions & 1 deletion man/parsers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions man/serializers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 23 additions & 3 deletions tests/testthat/test-parse-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,45 @@ test_that("Test tsv parser", {
})

test_that("Test feather parser", {
skip_if_not_installed("feather")
skip_if_not_installed("arrow")

tmp <- tempfile()
on.exit({
file.remove(tmp)
}, add = TRUE)

r_object <- iris
feather::write_feather(r_object, tmp)
arrow::write_feather(r_object, tmp)
val <- readBin(tmp, "raw", 10000)

parsed <- parse_body(val, "application/feather", make_parser("feather"))
parsed <- parse_body(val, "application/vnd.apache.arrow.file", make_parser("feather"))
# convert from feather tibble to data.frame
parsed <- as.data.frame(parsed, stringsAsFactors = FALSE)
attr(parsed, "spec") <- NULL

expect_equal(parsed, r_object)
})

test_that("Test parquet parser", {
skip_if_not_installed("arrow")

tmp <- tempfile()
on.exit({
file.remove(tmp)
}, add = TRUE)

r_object <- iris
arrow::write_parquet(r_object, tmp)
val <- readBin(tmp, "raw", 10000)

parsed <- parse_body(val, "application/vnd.apache.parquet", make_parser("parquet"))
# convert from parquet tibble to data.frame
parsed <- as.data.frame(parsed, stringsAsFactors = FALSE)
attr(parsed, "spec") <- NULL

expect_equal(parsed, r_object)
})

test_that("Test geojson parser", {
skip_if_not_installed("geojsonsf")
skip_if_not_installed("sf")
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test-serializer-feather.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
context("feather serializer")

test_that("feather serializes properly", {
skip_if_not_installed("feather")
skip_if_not_installed("arrow")

d <- data.frame(a=1, b=2, c="hi")
val <- serializer_feather()(d, data.frame(), PlumberResponse$new(), stop)
expect_equal(val$status, 200L)
expect_equal(val$headers$`Content-Type`, "application/feather")
expect_equal(val$headers$`Content-Type`, "application/vnd.apache.arrow.file")

# can test by doing a full round trip if we believe the parser works via `test-parse-body.R`
parsed <- parse_body(val$body, "application/feather", make_parser("feather"))
parsed <- parse_body(val$body, "application/vnd.apache.arrow.file", make_parser("feather"))
# convert from feather tibble to data.frame
parsed <- as.data.frame(parsed, stringsAsFactors = FALSE)
attr(parsed, "spec") <- NULL
Expand All @@ -18,7 +18,7 @@ test_that("feather serializes properly", {
})

test_that("Errors call error handler", {
skip_if_not_installed("feather")
skip_if_not_installed("arrow")

errors <- 0
errHandler <- function(req, res, err){
Expand All @@ -31,7 +31,7 @@ test_that("Errors call error handler", {
})

test_that("Errors are rendered correctly with debug TRUE", {
skip_if_not_installed("feather")
skip_if_not_installed("arrow")

pr <- pr() %>% pr_get("/", function() stop("myerror"), serializer = serializer_feather()) %>% pr_set_debug(TRUE)
capture.output(res <- pr$serve(make_req(pr = pr), PlumberResponse$new("csv")))
Expand Down
3 changes: 2 additions & 1 deletion vignettes/rendering-output.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ Annotation | Content Type | Description/References
`@serializer rds` | `application/rds` | Object processed with `base::serialize()`
`@serializer csv` | `text/csv` | Object processed with `readr::format_csv()`
`@serializer tsv` | `text/tab-separated-values` | Object processed with `readr::format_tsv()`
`@serializer feather` | `application/feather` | Object processed with `feather::write_feather()`
`@serializer feather` | `application/vnd.apache.arrow.file` | Object processed with `arrow::write_feather()`
`@serializer parquet` | `application/parquet` | Object processed with `arrow::write_parquet()`
`@serializer yaml` | `text/x-yaml` | Object processed with `yaml::as_yaml()`
`@serializer htmlwidget` | `text/html; charset=utf-8` | `htmlwidgets::saveWidget()`
`@serializer text` | `text/plain` | Text output processed by `as.character()`
Expand Down

0 comments on commit 0c21b78

Please sign in to comment.