diff --git a/DESCRIPTION b/DESCRIPTION index 7d872aa..a15a3b4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,8 @@ Imports: stringi, tibble, vctrs (>= 0.4.2), - zip + zip, + tools LinkingTo: cpp11 (>= 0.4.6) Suggests: diff --git a/R/read_ods.R b/R/read_ods.R index d5511d3..074fa3e 100644 --- a/R/read_ods.R +++ b/R/read_ods.R @@ -83,9 +83,6 @@ strings_as_factors = FALSE, verbose = FALSE, as_tibble = TRUE) { - if (missing(path) || !is.character(path)) { - stop("No file path was provided for the 'path' argument. Please provide a path to a file to import.", call. = FALSE) - } if (!file.exists(path)) { stop("file does not exist", call. = FALSE) } @@ -246,6 +243,30 @@ return(res) } +.determine_ods_format <- function(path, guess = FALSE) { + ext <- tolower(tools::file_ext(path)) + formats <- c( + ods = "ods", + fods = "fods", + xml = "fods" + ) + if (!isTRUE(guess)) { + ext <- unname(formats[ext]) + if (is.na(ext)) { + return("ods") + } + return(ext) + } + zip_sig <- as.raw(c( + "0x50", "0x4B", "0x03", "0x04" + )) + if (identical(zip_sig, readBin(path, n = 4, what = "raw"))) { + return("ods") + } else { + return("fods") + } +} + #' Read Data From (F)ODS File #' #' read_ods is a function to read a single sheet from an (f)ods file and return a data frame. For flat ods files (.fods or .xml), @@ -273,6 +294,10 @@ #' #' Default is `"unique"`. #' +#' @param ods_format character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless `guess` is `FALSE`. +#' @param guess logical. If the file extension is absent or not recognized, this +#' controls whether we attempt to guess format based on the file signature or +#' "magic number". #' @return A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file. #' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten #' @examples @@ -309,7 +334,16 @@ read_ods <- function(path, strings_as_factors = FALSE, verbose = FALSE, as_tibble = TRUE, - .name_repair = "unique") { + .name_repair = "unique", + ods_format = c("auto", "ods", "fods"), + guess = FALSE) { + ods_format <- match.arg(ods_format) + if (missing(path) || !is.character(path)) { + stop("No file path was provided for the 'path' argument. Please provide a path to a file to import.", call. = FALSE) + } + if (ods_format == "auto") { + ods_format <- .determine_ods_format(path, guess = guess) + } ## Should use match.call but there's a weird bug if one of the variable names is 'file' .read_ods(path = path, sheet = sheet, @@ -324,7 +358,7 @@ read_ods <- function(path, verbose = verbose, as_tibble = as_tibble, .name_repair = .name_repair, - flat = FALSE) + flat = ods_format == "fods") } #' @rdname read_ods diff --git a/man/read_ods.Rd b/man/read_ods.Rd index c05e080..320c711 100644 --- a/man/read_ods.Rd +++ b/man/read_ods.Rd @@ -18,7 +18,9 @@ read_ods( strings_as_factors = FALSE, verbose = FALSE, as_tibble = TRUE, - .name_repair = "unique" + .name_repair = "unique", + ods_format = c("auto", "ods", "fods"), + guess = FALSE ) read_fods( @@ -73,6 +75,12 @@ NULL, so that empty cells are treated as NA.} } Default is \code{"unique"}.} + +\item{ods_format}{character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless \code{guess} is \code{FALSE}.} + +\item{guess}{logical. If the file extension is absent or not recognized, this +controls whether we attempt to guess format based on the file signature or +"magic number".} } \value{ A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file. diff --git a/tests/testthat/test_determine.R b/tests/testthat/test_determine.R new file mode 100644 index 0000000..2a932fd --- /dev/null +++ b/tests/testthat/test_determine.R @@ -0,0 +1,10 @@ +test_that(".determine_ods_format works", { + expect_equal(.determine_ods_format(readODS::write_ods(iris)), "ods") + expect_equal(.determine_ods_format(readODS::write_fods(iris)), "fods") + expect_equal(.determine_ods_format(readODS::write_ods(iris, tempfile(fileext = ".fods"))), "fods") + expect_equal(.determine_ods_format(readODS::write_ods(iris, tempfile(fileext = ".fods")), guess = TRUE), "ods") + expect_equal(.determine_ods_format(readODS::write_fods(iris, tempfile(fileext = ".xml"))), "fods") + expect_equal(.determine_ods_format(readODS::write_ods(iris, tempfile(fileext = ".fods")), guess = TRUE), "ods") + expect_equal(.determine_ods_format(readODS::write_ods(iris, tempfile(fileext = ".xml"))), "fods") + expect_equal(.determine_ods_format(readODS::write_ods(iris, tempfile(fileext = ".xml")), guess = TRUE), "ods") +})