Skip to content

Commit

Permalink
Add fwf_cols function
Browse files Browse the repository at this point in the history
This adds a helper function `fwf_cols` that is a more intuitive way of specifying fixed width column start and end points. While `fwf_positions` requires three vectors for start, end, and names, `fwf_cols` accepts a named list of length-2 vectors of the column start and end positions.
  • Loading branch information
jrnold committed Feb 18, 2017
1 parent 32eb778 commit 59a1422
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -40,6 +40,7 @@ export(default_locale)
export(format_csv)
export(format_delim)
export(format_tsv)
export(fwf_cols)
export(fwf_empty)
export(fwf_positions)
export(fwf_widths)
Expand Down
31 changes: 30 additions & 1 deletion R/read_fwf.R
Expand Up @@ -20,13 +20,15 @@
#' fwf_sample <- readr_example("fwf-sample.txt")
#' cat(read_lines(fwf_sample))
#'
#' # You can specify column positions in three ways:
#' # You can specify column positions in four ways:
#' # 1. Guess based on position of empty columns
#' read_fwf(fwf_sample, fwf_empty(fwf_sample, col_names = c("first", "last", "state", "ssn")))
#' # 2. A vector of field widths
#' read_fwf(fwf_sample, fwf_widths(c(20, 10, 12), c("name", "state", "ssn")))
#' # 3. Paired vectors of start and end positions
#' read_fwf(fwf_sample, fwf_positions(c(1, 30), c(10, 42), c("name", "ssn")))
#' # 4. Named list of start and end positions
#' read_fwf(fwf_sample, fwf_cols(list(name = c(1, 30), ssn = c(10, 42))))
read_fwf <- function(file, col_positions, col_types = NULL,
locale = default_locale(), na = c("", "NA"),
comment = "", skip = 0, n_max = Inf,
Expand Down Expand Up @@ -112,3 +114,30 @@ fwf_positions <- function(start, end, col_names = NULL) {
col_names = col_names
)
}


#' @rdname read_fwf
#' @export
#' @param .cols Named list of numeric vectors. The names are the
#' column names, the numeric vectors must all have length two with the start
#' and end positions of the columns.
#' @param ... Additional named arguments added to the \code{.cols} list.
fwf_cols <- function(.cols, ...) {
.cols <- c(as.list(.cols), list(...))
if (!all(vapply(.cols, length, integer(1)) == 2L)) {
stop("All elements in `.cols` must have length 2.", call. = FALSE)
}
if (!all(vapply(.cols, is.numeric, logical(1)))) {
stop("All values in `.cols` must be integers.", call. = FALSE)
}
nms <- names(.cols) %||% rep("", length(.cols))
no_names <- which(nms == "")
nms[no_names] <- paste0("X", no_names)
if (any(nms == "")) {
stop("All elements of `.cols` must be named.")
}
start <- unname(vapply(.cols, `[`, numeric(1), 1L))
end <- unname(vapply(.cols, `[`, numeric(1), 2L))
col_names <- names(nms)
fwf_positions(start, end, col_names = nms)
}
29 changes: 29 additions & 0 deletions tests/testthat/test-read-fwf.R
Expand Up @@ -146,3 +146,32 @@ test_that("read_table can read from a pipe (552)", {
x <- read_table(pipe("echo a b c && echo 1 2 3 && echo 4 5 6"))
expect_equal(x$a, c(1, 4))
})

test_that("fwf_cols produces correct fwf_positions object", {
col_pos <- fwf_cols(list(a = c(1, 2), b = c(9, 12)), d = c(4, 6))
expect_equal(col_pos, fwf_positions(c(1, 9, 4),
c(2, 12, 6),
c("a", "b", "d")))

})

test_that("fwf_cols throws error with non-length 2 vectors", {
pattern <- "All elements in `\\.cols` must have length 2."
expect_error(fwf_cols(list(a = 1:2, b = 3)), pattern)
expect_error(fwf_cols(list(a = 1:3, b = 4:5)), pattern)
expect_error(fwf_cols(list(a = c(), b = 4:5)), pattern)
})

test_that("fwf_cols works with unnamed columns", {
col_pos <- expect_equal(
fwf_cols(list(c(1, 2), c(9, 12)), c(4, 6)),
fwf_positions(c(1, 9, 4),
c(2, 12, 6),
c("X1", "X2", "X3")))
col_pos <- expect_equal(
fwf_cols(list(a = c(1, 2), c(9, 12)), c(4, 6)),
fwf_positions(c(1, 9, 4),
c(2, 12, 6),
c("a", "X2", "X3")))
})

0 comments on commit 59a1422

Please sign in to comment.