Skip to content

Commit

Permalink
remove embedded nul bytes if present
Browse files Browse the repository at this point in the history
Signed-off-by: Jozef <jozef.hajnala@gmail.com>
  • Loading branch information
jozefhajnala committed Feb 2, 2020
1 parent 12b1d87 commit 8c86497
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
4 changes: 4 additions & 0 deletions R/core_deserialize.R
Expand Up @@ -85,6 +85,10 @@ readString <- function(con) {

if (stringLen > 0) {
raw <- read_bin(con, raw(), stringLen, endian = "big")
if (is.element("00", raw)) {
warning("Input contains embedded nuls, removing.")
raw <- raw[raw != "00"]
}
string <- rawToChar(raw)
}

Expand Down
Binary file added tests/testthat/embeddednul/with_embedded_nul.csv
Binary file not shown.
12 changes: 12 additions & 0 deletions tests/testthat/test-read-write.R
Expand Up @@ -239,5 +239,17 @@ test_that("spark_read_csv() can read verbatim column types", {
c = list(name = "c", type = "DoubleType")
)
)
})

test_that("spark_read_csv() can read if embedded nuls present", {
fpath <- dir(getwd(), "with_embedded_nul\\.csv$", full.names = TRUE, recursive = TRUE)
df <- spark_read_csv(sc, name = "test_embedded_nul", path = fpath)
expect_equal(
suppressWarnings(df %>% collect()),
data.frame(test = "teststring", stringsAsFactors = FALSE)
)
expect_warning(
df %>% collect(),
"Input contains embedded nuls, removing."
)
})

0 comments on commit 8c86497

Please sign in to comment.